Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#fromArgs()
The following examples show how to use
org.apache.flink.api.java.utils.ParameterTool#fromArgs() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProduceIntoKinesis.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); DataStream<String> simpleStringStream = see.addSource(new EventsGenerator()); Properties kinesisProducerConfig = new Properties(); kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_REGION, pt.getRequired("region")); kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey")); kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey")); FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>( new SimpleStringSchema(), kinesisProducerConfig); kinesis.setFailOnError(true); kinesis.setDefaultStream("flink-test"); kinesis.setDefaultPartition("0"); simpleStringStream.addSink(kinesis); see.execute(); }
Example 2
Source File: Kafka010Example.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer010<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer010<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.10 Example"); }
Example 3
Source File: LongRidesExercise.java From flink-training-exercises with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); DataStream<TaxiRide> longRides = rides .keyBy(ride -> ride.rideId) .process(new MatchFunction()); printOrTest(longRides); env.execute("Long Taxi Rides"); }
Example 4
Source File: Kafka010Example.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer010<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer010<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.10 Example"); }
Example 5
Source File: ConsumeFromKinesis.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties kinesisConsumerConfig = new Properties(); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>( "flink-test", new SimpleStringSchema(), kinesisConsumerConfig)); kinesis.print(); see.execute(); }
Example 6
Source File: AbstractTaskManagerProcessFailureRecoveryTest.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) { try { final ParameterTool parameterTool = ParameterTool.fromArgs(args); Configuration cfg = parameterTool.getConfiguration(); TaskManagerRunner.runTaskManager(cfg, ResourceID.generate()); } catch (Throwable t) { LOG.error("Failed to start TaskManager process", t); System.exit(1); } }
Example 7
Source File: PeriodicStreamingJob.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); int recordsPerSecond = params.getInt("recordsPerSecond", 10); int duration = params.getInt("durationInSecond", 60); int offset = params.getInt("offsetInSecond", 0); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); // execute a simple pass through program. PeriodicSourceGenerator generator = new PeriodicSourceGenerator( recordsPerSecond, duration, offset); DataStream<Tuple> rows = sEnv.addSource(generator); DataStream<Tuple> result = rows .keyBy(1) .timeWindow(Time.seconds(5)) .sum(0); result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE) .setParallelism(1); sEnv.execute(); }
Example 8
Source File: KafkaTopicValidator.java From yahoo-streaming-benchmark with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(parameterTool); DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>( parameterTool.getRequired("kafka.topic"), new SimpleStringSchema(), parameterTool.getProperties())); rawMessageStream.print(); env.execute(); }
Example 9
Source File: WindowJoin.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // parse the parameters final ParameterTool params = ParameterTool.fromArgs(args); final long windowSize = params.getLong("windowSize", 2000); final long rate = params.getLong("rate", 3L); System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate); System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]"); // obtain execution environment, run this example in "ingestion time" StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create the data sources for both grades and salaries DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate); DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate); // run the actual window join program // for testability, this functionality is in a separate method. DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize); // print the results with a single thread, rather than in parallel joinedStream.print().setParallelism(1); // execute program env.execute("Windowed Join Example"); }
Example 10
Source File: KafkaThroughput.java From flink-perf with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, UnknownHostException, InterruptedException { final ParameterTool pt = ParameterTool.fromArgs(args); TopologyBuilder builder = new TopologyBuilder(); BrokerHosts hosts = new ZkHosts(pt.getRequired("zookeeper")); SpoutConfig spoutConfig = new SpoutConfig(hosts, pt.getRequired("topic"), "/" + pt.getRequired("topic"), UUID.randomUUID().toString()); spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig); builder.setSpout("source", kafkaSpout, pt.getInt("sourceParallelism")); builder.setBolt("sink", new Throughput.Sink(pt), pt.getInt("sinkParallelism")).noneGrouping("source"); Config conf = new Config(); conf.setDebug(false); if (!pt.has("local")) { conf.setNumWorkers(pt.getInt("par", 2)); StormSubmitter.submitTopologyWithProgressBar("kafka-spout-"+pt.get("name", "no_name"), conf, builder.createTopology()); } else { conf.setMaxTaskParallelism(pt.getInt("par", 2)); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("kafka-spout", conf, builder.createTopology()); Thread.sleep(300000); cluster.shutdown(); } }
Example 11
Source File: IterateExample.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up input for the stream of integer pairs // obtain execution environment and set setBufferTimeout to 1 to enable // continuous flushing of the output buffers (lowest latency) StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment() .setBufferTimeout(1); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create input stream of integer pairs DataStream<Tuple2<Integer, Integer>> inputStream; if (params.has("input")) { inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap()); } else { System.out.println("Executing Iterate example with default input data set."); System.out.println("Use --input to specify file input."); inputStream = env.addSource(new RandomFibonacciSource()); } // create an iterative data stream from the input with 5 second timeout IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap()) .iterate(5000L); // apply the step function to get the next Fibonacci number // increment the counter and split the output with the output selector SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()) .split(new MySelector()); // close the iteration by selecting the tuples that were directed to the // 'iterate' channel in the output selector it.closeWith(step.select("iterate")); // to produce the final output select the tuples directed to the // 'output' channel then get the input pairs that have the greatest iteration counter // on a 1 second sliding window DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output") .map(new OutputMap()); // emit results if (params.has("output")) { numbers.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); numbers.print(); } // execute the program env.execute("Streaming Iteration Example"); }
Example 12
Source File: KinesisExampleTest.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { LOG.info("System properties: {}", System.getProperties()); final ParameterTool parameterTool = ParameterTool.fromArgs(args); String inputStream = parameterTool.getRequired("input-stream"); String outputStream = parameterTool.getRequired("output-stream"); KinesisPubsubClient pubsub = new KinesisPubsubClient(parameterTool.getProperties()); pubsub.createTopic(inputStream, 2, parameterTool.getProperties()); pubsub.createTopic(outputStream, 2, parameterTool.getProperties()); // The example job needs to start after streams are created and run in parallel to the validation logic. // The thread that runs the job won't terminate, we don't have a job reference to cancel it. // Once results are validated, the driver main thread will exit; job/cluster will be terminated from script. final AtomicReference<Exception> executeException = new AtomicReference<>(); Thread executeThread = new Thread( () -> { try { KinesisExample.main(args); // this message won't appear in the log, // job is terminated when shutting down cluster LOG.info("executed program"); } catch (Exception e) { executeException.set(e); } }); executeThread.start(); // generate input String[] messages = { "elephant,5,45218", "squirrel,12,46213", "bee,3,51348", "squirrel,22,52444", "bee,10,53412", "elephant,9,54867" }; for (String msg : messages) { pubsub.sendMessage(inputStream, msg); } LOG.info("generated records"); Deadline deadline = Deadline.fromNow(Duration.ofSeconds(60)); List<String> results = pubsub.readAllMessages(outputStream); while (deadline.hasTimeLeft() && executeException.get() == null && results.size() < messages.length) { LOG.info("waiting for results.."); Thread.sleep(1000); results = pubsub.readAllMessages(outputStream); } if (executeException.get() != null) { throw executeException.get(); } LOG.info("results: {}", results); Assert.assertEquals("Results received from '" + outputStream + "': " + results, messages.length, results.size()); String[] expectedResults = { "elephant,5,45218", "elephant,14,54867", "squirrel,12,46213", "squirrel,34,52444", "bee,3,51348", "bee,13,53412" }; for (String expectedResult : expectedResults) { Assert.assertTrue(expectedResult, results.contains(expectedResult)); } // TODO: main thread needs to create job or CLI fails with: // "The program didn't contain a Flink job. Perhaps you forgot to call execute() on the execution environment." System.out.println("test finished"); System.exit(0); }
Example 13
Source File: NearestTaxiSolution.java From flink-training-exercises with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by at most 60 seconds final int servingSpeedFactor = 600; // 10 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)); // add a socket source BroadcastStream<Query> queryStream = env.socketTextStream("localhost", 9999) .map(new MapFunction<String, Query>() { @Override public Query map(String msg) throws Exception { String[] parts = msg.split(",\\s*"); return new Query( Float.valueOf(parts[0]), // longitude Float.valueOf(parts[1])); // latitude } }) .broadcast(queryDescriptor); DataStream<Tuple3<Long, Long, Float>> reports = rides .keyBy((TaxiRide ride) -> ride.taxiId) .connect(queryStream) .process(new QueryFunction()); DataStream<Tuple3<Long, Long, Float>> nearest = reports // key by the queryId .keyBy(x -> x.f0) // the minimum, for each query, by distance .minBy(2); nearest.print(); env.execute("Nearest Available Taxi"); }
Example 14
Source File: KinesisExample.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); String inputStream = parameterTool.getRequired("input-stream"); String outputStream = parameterTool.getRequired("output-stream"); FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>( inputStream, new KafkaEventSchema(), parameterTool.getProperties()); consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor()); Properties producerProperties = new Properties(parameterTool.getProperties()); // producer needs region even when URL is specified producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1"); // test driver does not deaggregate producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false)); // KPL does not recognize endpoint URL.. String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT); if (kinesisUrl != null) { URL url = new URL(kinesisUrl); producerProperties.put("KinesisEndpoint", url.getHost()); producerProperties.put("KinesisPort", Integer.toString(url.getPort())); producerProperties.put("VerifyCertificate", "false"); } FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>( new KafkaEventSchema(), producerProperties); producer.setDefaultStream(outputStream); producer.setDefaultPartition("fakePartition"); DataStream<KafkaEvent> input = env .addSource(consumer) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink(producer); env.execute(); }
Example 15
Source File: KinesisExample.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); String inputStream = parameterTool.getRequired("input-stream"); String outputStream = parameterTool.getRequired("output-stream"); FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>( inputStream, new KafkaEventSchema(), parameterTool.getProperties()); consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor()); Properties producerProperties = new Properties(parameterTool.getProperties()); // producer needs region even when URL is specified producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1"); // test driver does not deaggregate producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false)); // KPL does not recognize endpoint URL.. String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT); if (kinesisUrl != null) { URL url = new URL(kinesisUrl); producerProperties.put("KinesisEndpoint", url.getHost()); producerProperties.put("KinesisPort", Integer.toString(url.getPort())); producerProperties.put("VerifyCertificate", "false"); } FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>( new KafkaEventSchema(), producerProperties); producer.setDefaultStream(outputStream); producer.setDefaultPartition("fakePartition"); DataStream<KafkaEvent> input = env .addSource(consumer) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink(producer); env.execute(); }
Example 16
Source File: WebLogAnalysis.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // get input data DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params); // Retain documents with keywords DataSet<Tuple1<String>> filterDocs = documents .filter(new FilterDocByKeyWords()) .project(0); // Filter ranks by minimum rank DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks .filter(new FilterByRank()); // Filter visits by visit date DataSet<Tuple1<String>> filterVisits = visits .filter(new FilterVisitsByDate()) .project(0); // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = filterDocs.join(filterRanks) .where(0).equalTo(1) .projectSecond(0, 1, 2); // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time DataSet<Tuple3<Integer, String, Integer>> result = joinDocsRanks.coGroup(filterVisits) .where(1).equalTo(0) .with(new AntiJoinVisits()); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("WebLogAnalysis Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
Example 17
Source File: StreamSQLTestProgram.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); String planner = params.get("planner", "old"); final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance(); builder.inStreamingMode(); if (planner.equals("old")) { builder.useOldPlanner(); } else if (planner.equals("blink")) { builder.useBlinkPlanner(); } final EnvironmentSettings settings = builder.build(); final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings); tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0)); tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); int overWindowSizeSeconds = 1; int tumbleWindowSizeSeconds = 10; String overQuery = String.format( "SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds); String tumbleQuery = String.format( "SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds); String joinQuery = String.format( "SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds); String finalAgg = String.format( "SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery); // get Table for SQL query Table result = tEnv.sqlQuery(finalAgg); // convert Table into append-only DataStream DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP)); final StreamingFileSink<Row> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.toString()); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); resultStream // inject a KillMapper that forwards all records but terminates the first execution attempt .map(new KillMapper()).setParallelism(1) // add sink function .addSink(sink).setParallelism(1); sEnv.execute(); }
Example 18
Source File: QsStateClient.java From flink with Apache License 2.0 | 4 votes |
public static void main(final String[] args) throws Exception { ParameterTool parameters = ParameterTool.fromArgs(args); // setup values String jobId = parameters.getRequired("job-id"); String host = parameters.get("host", "localhost"); int port = parameters.getInt("port", 9069); int numIterations = parameters.getInt("iterations", 1500); QueryableStateClient client = new QueryableStateClient(host, port); client.setExecutionConfig(new ExecutionConfig()); MapStateDescriptor<EmailId, EmailInformation> stateDescriptor = new MapStateDescriptor<>( QsConstants.STATE_NAME, TypeInformation.of(new TypeHint<EmailId>() { }), TypeInformation.of(new TypeHint<EmailInformation>() { }) ); // wait for state to exist for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s try { getMapState(jobId, client, stateDescriptor); break; } catch (ExecutionException e) { if (e.getCause() instanceof UnknownKeyOrNamespaceException) { System.err.println("State does not exist yet; sleeping 500ms"); Thread.sleep(500L); } else { throw e; } } if (i == (BOOTSTRAP_RETRIES - 1)) { throw new RuntimeException("Timeout: state doesn't exist after 120s"); } } // query state for (int iterations = 0; iterations < numIterations; iterations++) { MapState<EmailId, EmailInformation> mapState = getMapState(jobId, client, stateDescriptor); int counter = 0; for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) { // this is to force deserialization entry.getKey(); entry.getValue(); counter++; } System.out.println("MapState has " + counter + " entries"); // we look for it in the test Thread.sleep(100L); } }
Example 19
Source File: StatefulFunctionsJob.java From stateful-functions with Apache License 2.0 | 4 votes |
public static void main(String... args) throws Exception { ParameterTool parameterTool = ParameterTool.fromArgs(args); Configuration configuration = parameterTool.getConfiguration(); main(configuration); }
Example 20
Source File: KeyedJob.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); String savepointsPath = pt.getRequired("savepoint-path"); Configuration config = new Configuration(); config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointsPath); StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(config); env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE); env.setRestartStrategy(RestartStrategies.noRestart()); env.setStateBackend(new MemoryStateBackend()); /** * Source -> keyBy -> C(Window -> StatefulMap1 -> StatefulMap2) */ SingleOutputStreamOperator<Tuple2<Integer, Integer>> source = createIntegerTupleSource(env, ExecutionMode.GENERATE); SingleOutputStreamOperator<Integer> window = createWindowFunction(ExecutionMode.GENERATE, source); SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.GENERATE, window); SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.GENERATE, first); env.execute("job"); }