org.apache.flink.api.java.utils.ParameterTool#fromArgs

Source File: ProduceIntoKinesis.java From Flink-CEPplus with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());

	Properties kinesisProducerConfig = new Properties();
	kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
	kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));

	FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(
			new SimpleStringSchema(), kinesisProducerConfig);

	kinesis.setFailOnError(true);
	kinesis.setDefaultStream("flink-test");
	kinesis.setDefaultPartition("0");

	simpleStringStream.addSink(kinesis);

	see.execute();
}

Source File: Kafka010Example.java From Flink-CEPplus with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer010<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer010<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.10 Example");
}

Source File: LongRidesExercise.java From flink-training-exercises with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		// start the data generator
		DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)));

		DataStream<TaxiRide> longRides = rides
				.keyBy(ride -> ride.rideId)
				.process(new MatchFunction());

		printOrTest(longRides);

		env.execute("Long Taxi Rides");
	}

Source File: Kafka010Example.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer010<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer010<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.10 Example");
}

Source File: ConsumeFromKinesis.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}

Source File: AbstractTaskManagerProcessFailureRecoveryTest.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) {
	try {
		final ParameterTool parameterTool = ParameterTool.fromArgs(args);
		Configuration cfg = parameterTool.getConfiguration();

		TaskManagerRunner.runTaskManager(cfg, ResourceID.generate());
	}
	catch (Throwable t) {
		LOG.error("Failed to start TaskManager process", t);
		System.exit(1);
	}
}

Source File: PeriodicStreamingJob.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	int recordsPerSecond = params.getInt("recordsPerSecond", 10);
	int duration = params.getInt("durationInSecond", 60);
	int offset = params.getInt("offsetInSecond", 0);

	StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	sEnv.enableCheckpointing(4000);
	sEnv.getConfig().setAutoWatermarkInterval(1000);

	// execute a simple pass through program.
	PeriodicSourceGenerator generator = new PeriodicSourceGenerator(
		recordsPerSecond, duration, offset);
	DataStream<Tuple> rows = sEnv.addSource(generator);

	DataStream<Tuple> result = rows
		.keyBy(1)
		.timeWindow(Time.seconds(5))
		.sum(0);

	result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE)
		.setParallelism(1);

	sEnv.execute();
}

Source File: KafkaTopicValidator.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}

Source File: WindowJoin.java From Flink-CEPplus with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	// parse the parameters
	final ParameterTool params = ParameterTool.fromArgs(args);
	final long windowSize = params.getLong("windowSize", 2000);
	final long rate = params.getLong("rate", 3L);

	System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate);
	System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]");

	// obtain execution environment, run this example in "ingestion time"
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	// make parameters available in the web interface
	env.getConfig().setGlobalJobParameters(params);

	// create the data sources for both grades and salaries
	DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate);
	DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate);

	// run the actual window join program
	// for testability, this functionality is in a separate method.
	DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize);

	// print the results with a single thread, rather than in parallel
	joinedStream.print().setParallelism(1);

	// execute program
	env.execute("Windowed Join Example");
}

Source File: KafkaThroughput.java From flink-perf with Apache License 2.0

5 votes

public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	TopologyBuilder builder = new TopologyBuilder();
	BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper"));
	SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString());
	spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
	builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism"));

	builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source");

	Config conf = new Config();
	conf.setDebug(false);

	if (!pt.has("local")) {
		conf.setNumWorkers(pt.getInt("par", 2));

		StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology());
	} else {
		conf.setMaxTaskParallelism(pt.getInt("par", 2));

		LocalCluster cluster = new LocalCluster();
		cluster.submitTopology("kafka-spout", conf, builder.createTopology());

		Thread.sleep(300000);

		cluster.shutdown();
	}
}

Source File: IterateExample.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000L);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}

Source File: KinesisExampleTest.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	LOG.info("System properties: {}", System.getProperties());
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	KinesisPubsubClient pubsub = new KinesisPubsubClient(parameterTool.getProperties());
	pubsub.createTopic(inputStream, 2, parameterTool.getProperties());
	pubsub.createTopic(outputStream, 2, parameterTool.getProperties());

	// The example job needs to start after streams are created and run in parallel to the validation logic.
	// The thread that runs the job won't terminate, we don't have a job reference to cancel it.
	// Once results are validated, the driver main thread will exit; job/cluster will be terminated from script.
	final AtomicReference<Exception> executeException = new AtomicReference<>();
	Thread executeThread =
		new Thread(
			() -> {
				try {
					KinesisExample.main(args);
					// this message won't appear in the log,
					// job is terminated when shutting down cluster
					LOG.info("executed program");
				} catch (Exception e) {
					executeException.set(e);
				}
			});
	executeThread.start();

	// generate input
	String[] messages = {
		"elephant,5,45218",
		"squirrel,12,46213",
		"bee,3,51348",
		"squirrel,22,52444",
		"bee,10,53412",
		"elephant,9,54867"
	};
	for (String msg : messages) {
		pubsub.sendMessage(inputStream, msg);
	}
	LOG.info("generated records");

	Deadline deadline  = Deadline.fromNow(Duration.ofSeconds(60));
	List<String> results = pubsub.readAllMessages(outputStream);
	while (deadline.hasTimeLeft() && executeException.get() == null && results.size() < messages.length) {
		LOG.info("waiting for results..");
		Thread.sleep(1000);
		results = pubsub.readAllMessages(outputStream);
	}

	if (executeException.get() != null) {
		throw executeException.get();
	}

	LOG.info("results: {}", results);
	Assert.assertEquals("Results received from '" + outputStream + "': " + results,
		messages.length, results.size());

	String[] expectedResults = {
		"elephant,5,45218",
		"elephant,14,54867",
		"squirrel,12,46213",
		"squirrel,34,52444",
		"bee,3,51348",
		"bee,13,53412"
	};

	for (String expectedResult : expectedResults) {
		Assert.assertTrue(expectedResult, results.contains(expectedResult));
	}

	// TODO: main thread needs to create job or CLI fails with:
	// "The program didn't contain a Flink job. Perhaps you forgot to call execute() on the execution environment."
	System.out.println("test finished");
	System.exit(0);
}

Source File: NearestTaxiSolution.java From flink-training-exercises with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       	// events are out of order by at most 60 seconds
		final int servingSpeedFactor = 600; 	// 10 minutes worth of events are served every second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.setParallelism(ExerciseBase.parallelism);

		DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));

		// add a socket source
		BroadcastStream<Query> queryStream = env.socketTextStream("localhost", 9999)
				.map(new MapFunction<String, Query>() {
					@Override
					public Query map(String msg) throws Exception {
						String[] parts = msg.split(",\\s*");
						return new Query(
								Float.valueOf(parts[0]),	// longitude
								Float.valueOf(parts[1]));	// latitude
					}
				})
				.broadcast(queryDescriptor);

		DataStream<Tuple3<Long, Long, Float>> reports = rides
				.keyBy((TaxiRide ride) -> ride.taxiId)
				.connect(queryStream)
				.process(new QueryFunction());

		DataStream<Tuple3<Long, Long, Float>> nearest = reports
				// key by the queryId
				.keyBy(x -> x.f0)
				// the minimum, for each query, by distance
				.minBy(2);

		nearest.print();

		env.execute("Nearest Available Taxi");
	}

Source File: KinesisExample.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}

Source File: KinesisExample.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	String inputStream = parameterTool.getRequired("input-stream");
	String outputStream = parameterTool.getRequired("output-stream");

	FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>(
		inputStream,
		new KafkaEventSchema(),
		parameterTool.getProperties());
	consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor());

	Properties producerProperties = new Properties(parameterTool.getProperties());
	// producer needs region even when URL is specified
	producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1");
	// test driver does not deaggregate
	producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false));

	// KPL does not recognize endpoint URL..
	String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT);
	if (kinesisUrl != null) {
		URL url = new URL(kinesisUrl);
		producerProperties.put("KinesisEndpoint", url.getHost());
		producerProperties.put("KinesisPort", Integer.toString(url.getPort()));
		producerProperties.put("VerifyCertificate", "false");
	}

	FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>(
		new KafkaEventSchema(),
		producerProperties);
	producer.setDefaultStream(outputStream);
	producer.setDefaultPartition("fakePartition");

	DataStream<KafkaEvent> input = env
		.addSource(consumer)
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(producer);
	env.execute();
}

Source File: WebLogAnalysis.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}

Source File: StreamSQLTestProgram.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		String outputPath = params.getRequired("outputPath");
		String planner = params.get("planner", "old");

		final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance();
		builder.inStreamingMode();

		if (planner.equals("old")) {
			builder.useOldPlanner();
		} else if (planner.equals("blink")) {
			builder.useBlinkPlanner();
		}

		final EnvironmentSettings settings = builder.build();

		final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(
			3,
			Time.of(10, TimeUnit.SECONDS)
		));
		sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		sEnv.enableCheckpointing(4000);
		sEnv.getConfig().setAutoWatermarkInterval(1000);

		final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings);

		tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
		tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));

		int overWindowSizeSeconds = 1;
		int tumbleWindowSizeSeconds = 10;

		String overQuery = String.format(
			"SELECT " +
			"  key, " +
			"  rowtime, " +
			"  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " +
			"FROM table1",
			overWindowSizeSeconds);

		String tumbleQuery = String.format(
			"SELECT " +
			"  key, " +
			"  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " +
			"  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " +
			"FROM (%s) " +
			"WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " +
			"GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)",
			tumbleWindowSizeSeconds,
			tumbleWindowSizeSeconds,
			overQuery,
			tumbleWindowSizeSeconds);

		String joinQuery = String.format(
			"SELECT " +
			"  t1.key, " +
			"  t2.rowtime AS rowtime, " +
			"  t2.correct," +
			"  t2.wStart " +
			"FROM table2 t1, (%s) t2 " +
			"WHERE " +
			"  t1.key = t2.key AND " +
			"  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND",
			tumbleQuery,
			tumbleWindowSizeSeconds);

		String finalAgg = String.format(
			"SELECT " +
			"  SUM(correct) AS correct, " +
			"  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " +
			"FROM (%s) " +
			"GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)",
			joinQuery);

		// get Table for SQL query
		Table result = tEnv.sqlQuery(finalAgg);
		// convert Table into append-only DataStream
		DataStream<Row> resultStream =
			tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));

		final StreamingFileSink<Row> sink = StreamingFileSink
			.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
				PrintStream out = new PrintStream(stream);
				out.println(element.toString());
			})
			.withBucketAssigner(new KeyBucketAssigner())
			.withRollingPolicy(OnCheckpointRollingPolicy.build())
			.build();

		resultStream
			// inject a KillMapper that forwards all records but terminates the first execution attempt
			.map(new KillMapper()).setParallelism(1)
			// add sink function
			.addSink(sink).setParallelism(1);

		sEnv.execute();
	}

Source File: QsStateClient.java From flink with Apache License 2.0

4 votes

public static void main(final String[] args) throws Exception {

		ParameterTool parameters = ParameterTool.fromArgs(args);

		// setup values
		String jobId = parameters.getRequired("job-id");
		String host = parameters.get("host", "localhost");
		int port = parameters.getInt("port", 9069);
		int numIterations = parameters.getInt("iterations", 1500);

		QueryableStateClient client = new QueryableStateClient(host, port);
		client.setExecutionConfig(new ExecutionConfig());

		MapStateDescriptor<EmailId, EmailInformation> stateDescriptor =
				new MapStateDescriptor<>(
						QsConstants.STATE_NAME,
						TypeInformation.of(new TypeHint<EmailId>() {

						}),
						TypeInformation.of(new TypeHint<EmailInformation>() {

						})
				);

		// wait for state to exist
		for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s
			try {
				getMapState(jobId, client, stateDescriptor);
				break;
			} catch (ExecutionException e) {
				if (e.getCause() instanceof UnknownKeyOrNamespaceException) {
					System.err.println("State does not exist yet; sleeping 500ms");
					Thread.sleep(500L);
				} else {
					throw e;
				}
			}

			if (i == (BOOTSTRAP_RETRIES - 1)) {
				throw new RuntimeException("Timeout: state doesn't exist after 120s");
			}
		}

		// query state
		for (int iterations = 0; iterations < numIterations; iterations++) {

			MapState<EmailId, EmailInformation> mapState =
				getMapState(jobId, client, stateDescriptor);

			int counter = 0;
			for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) {
				// this is to force deserialization
				entry.getKey();
				entry.getValue();
				counter++;
			}
			System.out.println("MapState has " + counter + " entries"); // we look for it in the test

			Thread.sleep(100L);
		}
	}

Source File: StatefulFunctionsJob.java From stateful-functions with Apache License 2.0

4 votes

public static void main(String... args) throws Exception {
  ParameterTool parameterTool = ParameterTool.fromArgs(args);
  Configuration configuration = parameterTool.getConfiguration();

  main(configuration);
}

Source File: KeyedJob.java From Flink-CEPplus with Apache License 2.0

3 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	String savepointsPath = pt.getRequired("savepoint-path");

	Configuration config = new Configuration();
	config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointsPath);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(config);
	env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE);
	env.setRestartStrategy(RestartStrategies.noRestart());

	env.setStateBackend(new MemoryStateBackend());

	/**
	 * Source -> keyBy -> C(Window -> StatefulMap1 -> StatefulMap2)
	 */

	SingleOutputStreamOperator<Tuple2<Integer, Integer>> source = createIntegerTupleSource(env, ExecutionMode.GENERATE);

	SingleOutputStreamOperator<Integer> window = createWindowFunction(ExecutionMode.GENERATE, source);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.GENERATE, window);

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.GENERATE, first);

	env.execute("job");
}

Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#fromArgs()