Java Code Examples for org.apache.flink.api.java.utils.ParameterTool#get()
The following examples show how to use
org.apache.flink.api.java.utils.ParameterTool#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Simplify.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void configure(ParameterTool parameterTool) { String ordering = parameterTool.get("simplify"); if (ordering == null) { value = Ordering.NONE; } else { switch (ordering.toLowerCase()) { case "directed": value = Ordering.DIRECTED; break; case "undirected": value = parameterTool.has("clip_and_flip") ? Ordering.UNDIRECTED_CLIP_AND_FLIP : Ordering.UNDIRECTED; break; default: throw new ProgramParametrizationException( "Expected 'directed' or 'undirected' ordering but received '" + ordering + "'"); } } }
Example 2
Source File: S3UtilProgram.java From flink with Apache License 2.0 | 6 votes |
private static void numberOfLinesInFilesWithFullAndNamePrefix(ParameterTool params) { final String bucket = params.getRequired("bucket"); final String s3prefix = params.getRequired("s3prefix"); final String s3filePrefix = params.get("s3filePrefix", ""); int parallelism = params.getInt("parallelism", 10); List<String> files = listByFullPathPrefix(bucket, s3prefix); ExecutorService executor = Executors.newFixedThreadPool(parallelism); AmazonS3 s3client = AmazonS3ClientBuilder.defaultClient(); List<CompletableFuture<Integer>> requests = submitLineCountingRequestsForFilesAsync(executor, s3client, bucket, files, s3filePrefix); int count = waitAndComputeTotalLineCountResult(requests); executor.shutdownNow(); s3client.shutdown(); System.out.print(count); }
Example 3
Source File: StreamingNoop.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); // define the dataflow StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 1000)); env.readFileStream("input/", 60000, FileMonitoringFunction.WatchType.ONLY_NEW_FILES) .addSink(new DiscardingSink<String>()); // generate a job graph final JobGraph jobGraph = env.getStreamGraph().getJobGraph(); File jobGraphFile = new File(params.get("output", "job.graph")); try (FileOutputStream output = new FileOutputStream(jobGraphFile); ObjectOutputStream obOutput = new ObjectOutputStream(output)){ obOutput.writeObject(jobGraph); } }
Example 4
Source File: S3UtilProgram.java From flink with Apache License 2.0 | 6 votes |
private static void downloadByFullPathAndFileNamePrefix(ParameterTool params) { final String bucket = params.getRequired("bucket"); final String s3prefix = params.getRequired("s3prefix"); final String localFolder = params.getRequired("localFolder"); final String s3filePrefix = params.get("s3filePrefix", ""); TransferManager tx = TransferManagerBuilder.defaultTransferManager(); Predicate<String> keyPredicate = getKeyFilterByFileNamePrefix(s3filePrefix); KeyFilter keyFilter = s3filePrefix.isEmpty() ? KeyFilter.INCLUDE_ALL : objectSummary -> keyPredicate.test(objectSummary.getKey()); try { tx.downloadDirectory(bucket, s3prefix, new File(localFolder), keyFilter).waitForCompletion(); } catch (InterruptedException e) { System.out.println("Transfer interrupted"); } finally { tx.shutdownNow(); } }
Example 5
Source File: S3UtilProgram.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private static void downloadByFullPathAndFileNamePrefix(ParameterTool params) { final String bucket = params.getRequired("bucket"); final String s3prefix = params.getRequired("s3prefix"); final String localFolder = params.getRequired("localFolder"); final String s3filePrefix = params.get("s3filePrefix", ""); TransferManager tx = TransferManagerBuilder.defaultTransferManager(); Predicate<String> keyPredicate = getKeyFilterByFileNamePrefix(s3filePrefix); KeyFilter keyFilter = s3filePrefix.isEmpty() ? KeyFilter.INCLUDE_ALL : objectSummary -> keyPredicate.test(objectSummary.getKey()); try { tx.downloadDirectory(bucket, s3prefix, new File(localFolder), keyFilter).waitForCompletion(); } catch (InterruptedException e) { System.out.println("Transfer interrupted"); } finally { tx.shutdownNow(); } }
Example 6
Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0 | 5 votes |
private static void setupCheckpointing(final StreamExecutionEnvironment env, final ParameterTool pt) { String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue()); long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue()); CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once") ? CheckpointingMode.EXACTLY_ONCE : CheckpointingMode.AT_LEAST_ONCE; env.enableCheckpointing(checkpointInterval, checkpointingMode); boolean enableExternalizedCheckpoints = pt.getBoolean( ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(), ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue()); if (enableExternalizedCheckpoints) { String cleanupModeConfig = pt.get( ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(), ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue()); CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode; switch (cleanupModeConfig) { case "retain": cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION; break; case "delete": cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION; break; default: throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig); } env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode); final int tolerableDeclinedCheckpointNumber = pt.getInt( ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.key(), ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.defaultValue()); env.getCheckpointConfig().setTolerableCheckpointFailureNumber(tolerableDeclinedCheckpointNumber); } }
Example 7
Source File: ClickEventCount.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); configureEnvironment(params, env); String inputTopic = params.get("input-topic", "input"); String outputTopic = params.get("output-topic", "output"); String brokers = params.get("bootstrap.servers", "localhost:9092"); Properties kafkaProps = new Properties(); kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers); kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count"); env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps)) .name("ClickEvent Source") .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) { @Override public long extractTimestamp(final ClickEvent element) { return element.getTimestamp().getTime(); } }) .keyBy(ClickEvent::getPage) .timeWindow(WINDOW_SIZE) .aggregate(new CountingAggregator(), new ClickEventStatisticsCollector()) .name("ClickEvent Counter") .addSink(new FlinkKafkaProducer<>( outputTopic, new ClickEventStatisticsSerializationSchema(outputTopic), kafkaProps, FlinkKafkaProducer.Semantic.AT_LEAST_ONCE)) .name("ClickEventStatistics Sink"); env.execute("Click Event Count"); }
Example 8
Source File: PopularPlacesSolution.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int popThreshold = params.getInt("threshold", 20); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // find popular places DataStream<Tuple5<Float, Float, Long, Boolean, Integer>> popularSpots = rides // remove all rides which are not within NYC .filter(new NYCFilter()) // match ride to grid cell and event type (start or end) .map(new GridCellMatcher()) // partition by cell id and event type .<KeyedStream<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>>>keyBy(0, 1) // build sliding window .timeWindow(Time.minutes(15), Time.minutes(5)) // count ride events in window .apply(new RideCounter()) // filter by popularity threshold .filter((Tuple4<Integer, Long, Boolean, Integer> count) -> (count.f3 >= popThreshold)) // map grid cell to coordinates .map(new GridToCoordinates()); // print result on stdout printOrTest(popularSpots); // execute the transformation pipeline env.execute("Popular Places"); }
Example 9
Source File: Utils.java From flink-tutorials with Apache License 2.0 | 5 votes |
public static boolean isSensitive(String key, ParameterTool params) { Preconditions.checkNotNull(key, "key is null"); final String value = params.get(SENSITIVE_KEYS_KEY); if (value == null) { return false; } String keyInLower = key.toLowerCase(); String[] sensitiveKeys = value.split(","); for (int i = 0; i < sensitiveKeys.length; ++i) { String hideKey = sensitiveKeys[i]; if (keyInLower.length() >= hideKey.length() && keyInLower.contains(hideKey)) { return true; } } return false; }
Example 10
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
private static boolean isOriginalJobVariant(final ParameterTool pt) { switch (pt.get(TEST_JOB_VARIANT.key())) { case TEST_JOB_VARIANT_ORIGINAL: return true; case TEST_JOB_VARIANT_UPGRADED: return false; default: throw new IllegalArgumentException(String.format("'--test.job.variant' can be either '%s' or '%s'", TEST_JOB_VARIANT_ORIGINAL, TEST_JOB_VARIANT_UPGRADED)); } }
Example 11
Source File: LongRidesCEPExercise.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // CheckpointedTaxiRideSource delivers events in order DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new CheckpointedTaxiRideSource(input, servingSpeedFactor))); DataStream<TaxiRide> keyedRides = rides .keyBy("rideId"); // A complete taxi ride has a START event followed by an END event // This pattern is incomplete ... Pattern<TaxiRide, TaxiRide> completedRides = Pattern.<TaxiRide>begin("start"); // We want to find rides that have NOT been completed within 120 minutes. // This pattern matches rides that ARE completed. // Below we will ignore rides that match this pattern, and emit those that timeout. PatternStream<TaxiRide> patternStream = CEP.pattern(keyedRides, completedRides.within(Time.minutes(120))); OutputTag<TaxiRide> timedout = new OutputTag<TaxiRide>("timedout"){}; SingleOutputStreamOperator<TaxiRide> longRides = patternStream.flatSelect( timedout, new TaxiRideTimedOut<TaxiRide>(), new FlatSelectNothing<TaxiRide>() ); printOrTest(longRides.getSideOutput(timedout)); throw new MissingSolutionException(); // env.execute("Long Taxi Rides (CEP)"); }
Example 12
Source File: AbstractHandler.java From pravega-samples with Apache License 2.0 | 5 votes |
public AbstractHandler(String ... args) { ParameterTool params = ParameterTool.fromArgs(args); this.scope = params.get("scope", DEFAULT_SCOPE); this.stream = params.get("stream", DEFAULT_STREAM); this.controllerUri = params.get("controllerUri", DEFAULT_CONTROLLER_URI); this.create = params.getBoolean("create-stream", CREATE_STREAM); this.limit = params.getInt("threshold", DEFAULT_POPULAR_DEST_THRESHOLD); }
Example 13
Source File: TaxiQueryExercise.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by at most 60 seconds final int servingSpeedFactor = 1800; // 30 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // setup a stream of taxi rides DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // add a socket source for the query stream BroadcastStream<String> queryStream = env .addSource(stringSourceOrTest(new SocketTextStreamFunction("localhost", 9999, "\n", -1))) .broadcast(queryDescriptor); // connect the two streams and process queries DataStream<Tuple2<String, String>> results = rides .keyBy((TaxiRide ride) -> ride.taxiId) .connect(queryStream) .process(new QueryProcessor()); printOrTest(results); env.execute("Taxi Query"); }
Example 14
Source File: ExpiringStateSolution.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String ridesFile = params.get("rides", ExerciseBase.pathToRideData); final String faresFile = params.get("fares", ExerciseBase.pathToFareData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // 10 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); DataStream<TaxiRide> rides = env .addSource(rideSourceOrTest(new TaxiRideSource(ridesFile, maxEventDelay, servingSpeedFactor))) .filter((TaxiRide ride) -> (ride.isStart && (ride.rideId % 1000 != 0))) .keyBy(ride -> ride.rideId); DataStream<TaxiFare> fares = env .addSource(fareSourceOrTest(new TaxiFareSource(faresFile, maxEventDelay, servingSpeedFactor))) .keyBy(fare -> fare.rideId); SingleOutputStreamOperator processed = rides .connect(fares) .process(new EnrichmentFunction()); printOrTest(processed.getSideOutput(unmatchedFares)); env.execute("ExpiringStateSolution (java)"); }
Example 15
Source File: QsStateClient.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(final String[] args) throws Exception { ParameterTool parameters = ParameterTool.fromArgs(args); // setup values String jobId = parameters.getRequired("job-id"); String host = parameters.get("host", "localhost"); int port = parameters.getInt("port", 9069); int numIterations = parameters.getInt("iterations", 1500); QueryableStateClient client = new QueryableStateClient(host, port); client.setExecutionConfig(new ExecutionConfig()); MapStateDescriptor<EmailId, EmailInformation> stateDescriptor = new MapStateDescriptor<>( QsConstants.STATE_NAME, TypeInformation.of(new TypeHint<EmailId>() { }), TypeInformation.of(new TypeHint<EmailInformation>() { }) ); // wait for state to exist for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s try { getMapState(jobId, client, stateDescriptor); break; } catch (ExecutionException e) { if (e.getCause() instanceof UnknownKeyOrNamespaceException) { System.err.println("State does not exist yet; sleeping 500ms"); Thread.sleep(500L); } else { throw e; } } if (i == (BOOTSTRAP_RETRIES - 1)) { throw new RuntimeException("Timeout: state doesn't exist after 120s"); } } // query state for (int iterations = 0; iterations < numIterations; iterations++) { MapState<EmailId, EmailInformation> mapState = getMapState(jobId, client, stateDescriptor); int counter = 0; for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) { // this is to force deserialization entry.getKey(); entry.getValue(); counter++; } System.out.println("MapState has " + counter + " entries"); // we look for it in the test Thread.sleep(100L); } }
Example 16
Source File: NearestTaxiWithCleanupSolution.java From flink-training-exercises with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by at most 60 seconds final int servingSpeedFactor = 600; // 10 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // add a socket source BroadcastStream<Query> queryStream = env.socketTextStream("localhost", 9999) .assignTimestampsAndWatermarks(new QueryStreamAssigner()) .map(new MapFunction<String, Query>() { @Override public Query map(String msg) throws Exception { String[] parts = msg.split(",\\s*"); return new Query( Float.valueOf(parts[0]), // longitude Float.valueOf(parts[1])); // latitude } }) .broadcast(queryDescriptor); DataStream<Tuple3<Long, Long, Float>> reports = rides .keyBy((TaxiRide ride) -> ride.taxiId) .connect(queryStream) .process(new QueryFunction()); DataStream<Tuple3<Long, Long, Float>> nearest = reports // key by the queryId .keyBy(new KeySelector<Tuple3<Long, Long, Float>, Long>() { @Override public Long getKey(Tuple3<Long, Long, Float> value) throws Exception { return value.f0; } }) .process(new ClosestTaxi()); printOrTest(nearest); env.execute("Nearest Available Taxi"); }
Example 17
Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void setupEnvironment(StreamExecutionEnvironment env, ParameterTool pt) throws Exception { // set checkpointing semantics String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue()); long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue()); CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once") ? CheckpointingMode.EXACTLY_ONCE : CheckpointingMode.AT_LEAST_ONCE; env.enableCheckpointing(checkpointInterval, checkpointingMode); // use event time env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // parallelism env.setParallelism(pt.getInt(ENVIRONMENT_PARALLELISM.key(), ENVIRONMENT_PARALLELISM.defaultValue())); env.setMaxParallelism(pt.getInt(ENVIRONMENT_MAX_PARALLELISM.key(), ENVIRONMENT_MAX_PARALLELISM.defaultValue())); // restart strategy String restartStrategyConfig = pt.get(ENVIRONMENT_RESTART_STRATEGY.key()); if (restartStrategyConfig != null) { RestartStrategies.RestartStrategyConfiguration restartStrategy; switch (restartStrategyConfig) { case "fixed_delay": restartStrategy = RestartStrategies.fixedDelayRestart( pt.getInt( ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.key(), ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.defaultValue()), pt.getLong( ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.key(), ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.defaultValue())); break; case "no_restart": restartStrategy = RestartStrategies.noRestart(); break; default: throw new IllegalArgumentException("Unkown restart strategy: " + restartStrategyConfig); } env.setRestartStrategy(restartStrategy); } // state backend final String stateBackend = pt.get( STATE_BACKEND.key(), STATE_BACKEND.defaultValue()); final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key()); if ("file".equalsIgnoreCase(stateBackend)) { boolean asyncCheckpoints = pt.getBoolean( STATE_BACKEND_FILE_ASYNC.key(), STATE_BACKEND_FILE_ASYNC.defaultValue()); env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints)); } else if ("rocks".equalsIgnoreCase(stateBackend)) { boolean incrementalCheckpoints = pt.getBoolean( STATE_BACKEND_ROCKS_INCREMENTAL.key(), STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue()); env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints)); } else { throw new IllegalArgumentException("Unknown backend requested: " + stateBackend); } boolean enableExternalizedCheckpoints = pt.getBoolean( ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(), ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue()); if (enableExternalizedCheckpoints) { String cleanupModeConfig = pt.get( ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(), ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue()); CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode; switch (cleanupModeConfig) { case "retain": cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION; break; case "delete": cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION; break; default: throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig); } env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode); } // make parameters available in the web interface env.getConfig().setGlobalJobParameters(pt); }
Example 18
Source File: QsStateClient.java From flink with Apache License 2.0 | 4 votes |
public static void main(final String[] args) throws Exception { ParameterTool parameters = ParameterTool.fromArgs(args); // setup values String jobId = parameters.getRequired("job-id"); String host = parameters.get("host", "localhost"); int port = parameters.getInt("port", 9069); int numIterations = parameters.getInt("iterations", 1500); QueryableStateClient client = new QueryableStateClient(host, port); client.setExecutionConfig(new ExecutionConfig()); MapStateDescriptor<EmailId, EmailInformation> stateDescriptor = new MapStateDescriptor<>( QsConstants.STATE_NAME, TypeInformation.of(new TypeHint<EmailId>() { }), TypeInformation.of(new TypeHint<EmailInformation>() { }) ); // wait for state to exist for (int i = 0; i < BOOTSTRAP_RETRIES; i++) { // ~120s try { getMapState(jobId, client, stateDescriptor); break; } catch (ExecutionException e) { if (e.getCause() instanceof UnknownKeyOrNamespaceException) { System.err.println("State does not exist yet; sleeping 500ms"); Thread.sleep(500L); } else { throw e; } } if (i == (BOOTSTRAP_RETRIES - 1)) { throw new RuntimeException("Timeout: state doesn't exist after 120s"); } } // query state for (int iterations = 0; iterations < numIterations; iterations++) { MapState<EmailId, EmailInformation> mapState = getMapState(jobId, client, stateDescriptor); int counter = 0; for (Map.Entry<EmailId, EmailInformation> entry: mapState.entries()) { // this is to force deserialization entry.getKey(); entry.getValue(); counter++; } System.out.println("MapState has " + counter + " entries"); // we look for it in the test Thread.sleep(100L); } }
Example 19
Source File: KafkaEventsGeneratorJob.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); double errorRate = params.getDouble("error-rate", 0.0); int sleep = params.getInt("sleep", 1); String kafkaTopic = params.get("kafka-topic"); String brokers = params.get("brokers", "localhost:9092"); System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep); System.out.println(); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env .addSource(new EventsGeneratorSource(errorRate, sleep)) .addSink(new FlinkKafkaProducer<>(brokers, kafkaTopic, new EventDeSerializer())); // trigger program execution env.execute("State machine example Kafka events generator job"); }
Example 20
Source File: StreamingETL.java From flink-streaming-etl with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // parse arguments ParameterTool params = ParameterTool.fromPropertiesFile(args[0]); // create streaming environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // enable event time processing env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // enable fault-tolerance env.enableCheckpointing(1000); // enable restarts env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L)); env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend")); // run each operator separately env.disableOperatorChaining(); // get data from Kafka Properties kParams = params.getProperties(); kParams.setProperty("group.id", UUID.randomUUID().toString()); DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source") .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) { @Override public long extractTimestamp(ObjectNode jsonNodes) { return jsonNodes.get("timestamp_ms").asLong(); } }).name("Timestamp extractor"); // filter out records without lang field DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field"); // select only lang = "en" tweets DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets"); // write to file system RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink")); rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink"); // build aggregates (count per language) using window (10 seconds tumbling): DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText()) .timeWindow(Time.seconds(10)) .apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)"); // write window aggregate to ElasticSearch List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300)); ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest()); languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink"); // word-count on the tweet stream DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang // get text from tweets .map(tweet -> tweet.get("text").asText()).name("Get text from Tweets") // split text into (word, 1) tuples .flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() { @Override public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception { String[] splits = s.split(" "); for (String sp : splits) { collector.collect(new Tuple2<>(sp, 1L)); } } }).name("Tokenize words") // group by word .keyBy(0) // build 1 min windows, compute every 10 seconds --> count word frequency .timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)") // build top n every 10 seconds .timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)"); // write top Ns to Kafka topic topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka"); env.execute("Streaming ETL"); }