org.apache.flink.streaming.api.datastream.KeyedStream Java Examples
The following examples show how to use
org.apache.flink.streaming.api.datastream.KeyedStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkGroupByKeyWrapper.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) { final Coder<K> keyCoder = inputKvCoder.getKeyCoder(); final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder); final boolean isKeyVoid = keyCoder instanceof VoidCoder; return inputDataStream.keyBy( new KeySelectorWithQueryableResultType<K, V>() { @Override public K getKey(WindowedValue<KV<K, V>> value) throws Exception { return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE : value.getValue().getKey(); } @Override public TypeInformation<K> getProducedType() { return keyTypeInfo; } }); }
Example #2
Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0 | 6 votes |
static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows( KeyedStream<Event, Integer> keyedStream, ParameterTool pt) { long eventTimeProgressPerEvent = pt.getLong( SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(), SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue()); return keyedStream.timeWindow( Time.milliseconds( pt.getLong( TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(), TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue() ) * eventTimeProgressPerEvent ) ); }
Example #3
Source File: SerialStreamingLedgerRuntimeProvider.java From da-streamingledger with Apache License 2.0 | 6 votes |
@Override public ResultStreams translate(String name, List<InputAndSpec<?, ?>> streamLedgerSpecs) { List<OutputTag<?>> sideOutputTags = createSideOutputTags(streamLedgerSpecs); // the input stream is a union of different streams. KeyedStream<TaggedElement, Boolean> input = union(streamLedgerSpecs) .keyBy(unused -> true); // main pipeline String serialTransactorName = "SerialTransactor(" + name + ")"; SingleOutputStreamOperator<Void> resultStream = input .process(new SerialTransactor(specs(streamLedgerSpecs), sideOutputTags)) .name(serialTransactorName) .uid(serialTransactorName + "___SERIAL_TX") .forceNonParallel() .returns(Void.class); // gather the sideOutputs. Map<String, DataStream<?>> output = new HashMap<>(); for (OutputTag<?> outputTag : sideOutputTags) { DataStream<?> rs = resultStream.getSideOutput(outputTag); output.put(outputTag.getId(), rs); } return new ResultStreams(output); }
Example #4
Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0 | 6 votes |
static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows( KeyedStream<Event, Integer> keyedStream, ParameterTool pt) { long eventTimeProgressPerEvent = pt.getLong( SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(), SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue()); return keyedStream.timeWindow( Time.milliseconds( pt.getLong( TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(), TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue() ) * eventTimeProgressPerEvent ) ); }
Example #5
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 6 votes |
private static void executeOriginalVariant(StreamExecutionEnvironment env, ParameterTool pt) throws Exception { KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt)) .name("EventSource") .uid("EventSource") .assignTimestampsAndWatermarks(createTimestampExtractor(pt)) .keyBy(Event::getKey); List<TypeSerializer<ComplexPayload>> stateSer = Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig())); KeyedStream<Event, Integer> afterStatefulOperations = applyOriginalStatefulOperations(source, stateSer, Collections.emptyList()); afterStatefulOperations .flatMap(createSemanticsCheckMapper(pt)) .name("SemanticsCheckMapper") .addSink(new PrintSinkFunction<>()); env.execute("General purpose test job"); }
Example #6
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 6 votes |
private static void executeUpgradedVariant(StreamExecutionEnvironment env, ParameterTool pt) throws Exception { KeyedStream<UpgradedEvent, Integer> source = env.addSource(createEventSource(pt)) .name("EventSource") .uid("EventSource") .assignTimestampsAndWatermarks(createTimestampExtractor(pt)) .map(new UpgradeEvent()) .keyBy(UpgradedEvent::getKey); List<TypeSerializer<ComplexPayload>> stateSer = Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig())); KeyedStream<UpgradedEvent, Integer> afterStatefulOperations = applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList()); afterStatefulOperations .map(new DowngradeEvent()) .keyBy(Event::getKey) .flatMap(createSemanticsCheckMapper(pt)) .name("SemanticsCheckMapper") .addSink(new PrintSinkFunction<>()); env.execute("General purpose test job"); }
Example #7
Source File: KafkaShuffleTestBase.java From flink with Apache License 2.0 | 6 votes |
static KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> createKafkaShuffle( StreamExecutionEnvironment env, String topic, int numElementsPerProducer, int producerParallelism, TimeCharacteristic timeCharacteristic, int numberOfPartitions, boolean randomness) { DataStream<Tuple3<Integer, Long, Integer>> source = env.addSource(new KafkaSourceFunction(numElementsPerProducer)).setParallelism(producerParallelism); DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ? source.assignTimestampsAndWatermarks(new PunctuatedExtractor(randomness)).setParallelism(producerParallelism) : source; return FlinkKafkaShuffle.persistentKeyBy( input, topic, producerParallelism, numberOfPartitions, kafkaServer.getStandardProperties(), 0); }
Example #8
Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows( KeyedStream<Event, Integer> keyedStream, ParameterTool pt) { long eventTimeProgressPerEvent = pt.getLong( SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(), SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue()); return keyedStream.timeWindow( Time.milliseconds( pt.getLong( TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(), TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue() ) * eventTimeProgressPerEvent ) ); }
Example #9
Source File: KafkaShuffleTestBase.java From flink with Apache License 2.0 | 6 votes |
static KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> createKafkaShuffle( StreamExecutionEnvironment env, String topic, int numElementsPerProducer, int producerParallelism, TimeCharacteristic timeCharacteristic, int numberOfPartitions) { return createKafkaShuffle( env, topic, numElementsPerProducer, producerParallelism, timeCharacteristic, numberOfPartitions, false); }
Example #10
Source File: UdfStreamOperatorCheckpointingITCase.java From flink with Apache License 2.0 | 5 votes |
/** * Assembles a stream of a grouping field and some long data. Applies reduce functions * on this stream. */ @Override public void testProgram(StreamExecutionEnvironment env) { // base stream KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence()) .keyBy(0); stream // testing built-in aggregate .min(1) // failure generation .map(new OnceFailingIdentityMapFunction(NUM_INPUT)) .keyBy(0) .addSink(new MinEvictingQueueSink()); stream // testing UDF reducer .reduce(new ReduceFunction<Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> reduce( Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception { return Tuple2.of(value1.f0, value1.f1 + value2.f1); } }) .keyBy(0) .addSink(new SumEvictingQueueSink()); stream // testing UDF folder .fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> fold( Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception { return Tuple2.of(value.f0, accumulator.f1 + value.f1); } }) .keyBy(0) .addSink(new FoldEvictingQueueSink()); }
Example #11
Source File: RideCount.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // start the data generator DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)); // map each ride to a tuple of (driverId, 1) DataStream<Tuple2<Long, Long>> tuples = rides.map(new MapFunction<TaxiRide, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(TaxiRide ride) throws Exception { return new Tuple2<Long, Long>(ride.driverId, 1L) ; } }); // partition the stream by the driverId KeyedStream<Tuple2<Long, Long>, Tuple> keyedByDriverId = tuples.keyBy(0); // count the rides for each driver DataStream<Tuple2<Long, Long>> rideCounts = keyedByDriverId.sum(1); // we could, in fact, print out any or all of these streams rideCounts.print(); // run the cleansing pipeline env.execute("Ride Count"); }
Example #12
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy. * This method assumes that <b>elements are already grouped by key</b>. * <p/> * The difference with {@link #create(PipelineOptions, PCollection, KeyedStream, Combine.KeyedCombineFn, KvCoder)} * is that this method assumes no combiner function * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}). * * @param options the general job configuration options. * @param input the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}. * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key. */ public static <K, VIN> DataStream<WindowedValue<KV<K, Iterable<VIN>>>> createForIterable( PipelineOptions options, PCollection input, KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey) { Preconditions.checkNotNull(options); KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder(); Coder<K> keyCoder = inputKvCoder.getKeyCoder(); Coder<VIN> inputValueCoder = inputKvCoder.getValueCoder(); FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper(options, input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, null); Coder<Iterable<VIN>> valueIterCoder = IterableCoder.of(inputValueCoder); KvCoder<K, Iterable<VIN>> outputElemCoder = KvCoder.of(keyCoder, valueIterCoder); Coder<WindowedValue<KV<K, Iterable<VIN>>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of( outputElemCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<KV<K, Iterable<VIN>>>> outputTypeInfo = new CoderTypeInformation<>(windowedOutputElemCoder); DataStream<WindowedValue<KV<K, Iterable<VIN>>>> groupedByKeyAndWindow = groupedStreamByKey .transform("GroupByWindow", new CoderTypeInformation<>(windowedOutputElemCoder), windower) .returns(outputTypeInfo); return groupedByKeyAndWindow; }
Example #13
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy. * This method assumes that <b>elements are already grouped by key</b>. * <p/> * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)} * is that this method assumes that a combiner function is provided * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}). * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state. * * @param options the general job configuration options. * @param input the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}. * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key. * @param combiner the combiner to be used. * @param outputKvCoder the type of the output values. */ public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create( PipelineOptions options, PCollection input, KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey, Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner, KvCoder<K, VOUT> outputKvCoder) { Preconditions.checkNotNull(options); KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder(); FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options, input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner); Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of( outputKvCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo = new CoderTypeInformation<>(windowedOutputElemCoder); DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey .transform("GroupByWindowWithCombiner", new CoderTypeInformation<>(outputKvCoder), windower) .returns(outputTypeInfo); return groupedByKeyAndWindow; }
Example #14
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
private static KeyedStream<Event, Integer> applyTestStatefulOperator( String name, JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc, KeyedStream<Event, Integer> source, List<TypeSerializer<ComplexPayload>> stateSer, List<Class<ComplexPayload>> stateClass) { return source .map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass)) .name(name) .uid(name) .returns(Event.class) .keyBy(Event::getKey); }
Example #15
Source File: SummaryTreeReduce.java From gelly-streaming with Apache License 2.0 | 5 votes |
private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) { if (input.getParallelism() <= 2) { return input; } int nextParal = input.getParallelism() / 2; DataStream<Tuple2<Integer, S>> unpartitionedStream = input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() { //collapse two partitions into one @Override public Integer getKey(Tuple2<Integer, S> record) throws Exception { return record.f0 / 2; } }); //repartition stream to p / 2 aggregators KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream = unpartitionedStream.map(new PartitionReMapper()).returns(aggType) .setParallelism(nextParal) .keyBy(0); //window again on event time and aggregate DataStream<Tuple2<Integer, S>> aggregatedStream = repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .reduce(new AggregationWrapper<>(getCombineFun())) .setParallelism(nextParal); return enhance(aggregatedStream, aggType); }
Example #16
Source File: UdfStreamOperatorCheckpointingITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Assembles a stream of a grouping field and some long data. Applies reduce functions * on this stream. */ @Override public void testProgram(StreamExecutionEnvironment env) { // base stream KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence()) .keyBy(0); stream // testing built-in aggregate .min(1) // failure generation .map(new OnceFailingIdentityMapFunction(NUM_INPUT)) .keyBy(0) .addSink(new MinEvictingQueueSink()); stream // testing UDF reducer .reduce(new ReduceFunction<Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> reduce( Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception { return Tuple2.of(value1.f0, value1.f1 + value2.f1); } }) .keyBy(0) .addSink(new SumEvictingQueueSink()); stream // testing UDF folder .fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> fold( Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception { return Tuple2.of(value.f0, accumulator.f1 + value.f1); } }) .keyBy(0) .addSink(new FoldEvictingQueueSink()); }
Example #17
Source File: KafkaShuffleExactlyOnceITCase.java From flink with Apache License 2.0 | 5 votes |
/** * To test failure recovery with partition assignment after processing 2/3 data. * * <p>Schema: (key, timestamp, source instance Id). * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3 */ private void testAssignedToPartitionFailureRecovery( int numElementsPerProducer, TimeCharacteristic timeCharacteristic) throws Exception { String topic = topic("partition_failure_recovery", timeCharacteristic); final int numberOfPartitions = 3; final int producerParallelism = 2; final int failAfterElements = numElementsPerProducer * producerParallelism * 2 / 3; createTestTopic(topic, numberOfPartitions, 1); final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic); KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle( env, topic, numElementsPerProducer, producerParallelism, timeCharacteristic, numberOfPartitions); keyedStream .process(new PartitionValidator(keyedStream.getKeySelector(), numberOfPartitions, topic)) .setParallelism(numberOfPartitions) .map(new ToInteger(producerParallelism)).setParallelism(numberOfPartitions) .map(new FailingIdentityMapper<>(failAfterElements)).setParallelism(1) .addSink(new ValidatingExactlyOnceSink(numElementsPerProducer * producerParallelism)).setParallelism(1); FailingIdentityMapper.failedBefore = false; tryExecute(env, topic); deleteTestTopic(topic); }
Example #18
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateNode( PTransform<PCollection<KV<K, InputT>>, PCollection<KeyedWorkItem<K, InputT>>> transform, FlinkStreamingTranslationContext context) { PCollection<KV<K, InputT>> input = context.getInput(transform); KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder(); SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of( inputKvCoder.getKeyCoder(), inputKvCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder()); WindowedValue.ValueOnlyWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder = WindowedValue.getValueOnlyCoder(workItemCoder); CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder); DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input); DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream = inputDataStream .flatMap(new ToKeyedWorkItemInGlobalWindow<>(context.getPipelineOptions())) .returns(workItemTypeInfo) .name("ToKeyedWorkItem"); KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder())); context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream); }
Example #19
Source File: KeyedTransformations.java From examples-java with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // set up the streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // use event time for the application env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // configure watermark interval env.getConfig().setAutoWatermarkInterval(1000L); // ingest sensor stream DataStream<SensorReading> readings = env // SensorSource generates random temperature readings .addSource(new SensorSource()) // assign timestamps and watermarks which are required for event time .assignTimestampsAndWatermarks(new SensorTimeAssigner()); // group sensor readings by sensor id KeyedStream<SensorReading, String> keyed = readings .keyBy(r -> r.id); // a rolling reduce that computes the highest temperature of each sensor and // the corresponding timestamp DataStream<SensorReading> maxTempPerSensor = keyed .reduce((r1, r2) -> { if (r1.temperature > r2.temperature) { return r1; } else { return r2; } }); maxTempPerSensor.print(); // execute application env.execute("Keyed Transformations Example"); }
Example #20
Source File: MultiStreamTransformations.java From examples-java with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // set up the streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // use event time for the application env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // configure watermark interval env.getConfig().setAutoWatermarkInterval(1000L); // ingest sensor stream DataStream<SensorReading> tempReadings = env // SensorSource generates random temperature readings .addSource(new SensorSource()) // assign timestamps and watermarks which are required for event time .assignTimestampsAndWatermarks(new SensorTimeAssigner()); // ingest smoke level stream DataStream<SmokeLevel> smokeReadings = env .addSource(new SmokeLevelSource()) .setParallelism(1); // group sensor readings by sensor id KeyedStream<SensorReading, String> keyedTempReadings = tempReadings .keyBy(r -> r.id); // connect the two streams and raise an alert if the temperature and // smoke levels are high DataStream<Alert> alerts = keyedTempReadings .connect(smokeReadings.broadcast()) .flatMap(new RaiseAlertFlatMap()); alerts.print(); // execute the application env.execute("Multi-Stream Transformations Example"); }
Example #21
Source File: KafkaShuffleITCase.java From flink with Apache License 2.0 | 5 votes |
/** * To test data is partitioned to the right partition. * * <p>Schema: (key, timestamp, source instance Id). * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3 */ private void testAssignedToPartition( int numElementsPerProducer, TimeCharacteristic timeCharacteristic) throws Exception { String topic = topic("test_assigned_to_partition", timeCharacteristic); final int numberOfPartitions = 3; final int producerParallelism = 2; createTestTopic(topic, numberOfPartitions, 1); final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic); KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle( env, topic, numElementsPerProducer, producerParallelism, timeCharacteristic, numberOfPartitions); keyedStream .process(new PartitionValidator(keyedStream.getKeySelector(), numberOfPartitions, topic)) .setParallelism(numberOfPartitions) .map(new ElementCountNoMoreThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1) .map(new ElementCountNoLessThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1); tryExecute(env, topic); deleteTestTopic(topic); }
Example #22
Source File: KafkaShuffleITCase.java From flink with Apache License 2.0 | 5 votes |
/** * To watermark from the consumer side always increase. * * <p>Schema: (key, timestamp, source instance Id). * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3 */ private void testWatermarkIncremental(int numElementsPerProducer) throws Exception { TimeCharacteristic timeCharacteristic = EventTime; String topic = topic("test_watermark_incremental", timeCharacteristic); final int numberOfPartitions = 3; final int producerParallelism = 2; createTestTopic(topic, numberOfPartitions, 1); final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic); KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle( env, topic, numElementsPerProducer, producerParallelism, timeCharacteristic, numberOfPartitions, true); keyedStream .process(new WatermarkValidator()) .setParallelism(numberOfPartitions) .map(new ElementCountNoMoreThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1) .map(new ElementCountNoLessThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1); tryExecute(env, topic); deleteTestTopic(topic); }
Example #23
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
private static KeyedStream<Event, Integer> applyUpgradedStatefulOperations( KeyedStream<Event, Integer> source, List<TypeSerializer<ComplexPayload>> stateSer, List<Class<ComplexPayload>> stateClass) { source = applyTestStatefulOperator("stateMap2", simpleStateUpdate("stateMap2"), source, stateSer, stateClass); source = applyTestStatefulOperator("stateMap1", lastStateUpdate("stateMap1"), source, stateSer, stateClass); return applyTestStatefulOperator("stateMap3", simpleStateUpdate("stateMap3"), source, stateSer, stateClass); }
Example #24
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
private static KeyedStream<Event, Integer> applyOriginalStatefulOperations( KeyedStream<Event, Integer> source, List<TypeSerializer<ComplexPayload>> stateSer, List<Class<ComplexPayload>> stateClass) { source = applyTestStatefulOperator("stateMap1", simpleStateUpdate("stateMap1"), source, stateSer, stateClass); return applyTestStatefulOperator("stateMap2", lastStateUpdate("stateMap2"), source, stateSer, stateClass); }
Example #25
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt)) .name("EventSource") .uid("EventSource") .assignTimestampsAndWatermarks(createTimestampExtractor(pt)) .keyBy(Event::getKey); List<TypeSerializer<ComplexPayload>> stateSer = Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig())); KeyedStream<Event, Integer> afterStatefulOperations = isOriginalJobVariant(pt) ? applyOriginalStatefulOperations(source, stateSer, Collections.emptyList()) : applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList()); afterStatefulOperations .flatMap(createSemanticsCheckMapper(pt)) .name("SemanticsCheckMapper") .addSink(new PrintSinkFunction<>()); env.execute("General purpose test job"); }
Example #26
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
private static KeyedStream<Event, Integer> applyOriginalStatefulOperations( KeyedStream<Event, Integer> source, List<TypeSerializer<ComplexPayload>> stateSer, List<Class<ComplexPayload>> stateClass) { source = applyTestStatefulOperator("stateMap1", simpleStateUpdate("stateMap1"), source, stateSer, stateClass); return applyTestStatefulOperator("stateMap2", lastStateUpdate("stateMap2"), source, stateSer, stateClass); }
Example #27
Source File: UdfStreamOperatorCheckpointingITCase.java From flink with Apache License 2.0 | 5 votes |
/** * Assembles a stream of a grouping field and some long data. Applies reduce functions * on this stream. */ @Override public void testProgram(StreamExecutionEnvironment env) { // base stream KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence()) .keyBy(0); stream // testing built-in aggregate .min(1) // failure generation .map(new OnceFailingIdentityMapFunction(NUM_INPUT)) .keyBy(0) .addSink(new MinEvictingQueueSink()); stream // testing UDF reducer .reduce(new ReduceFunction<Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> reduce( Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception { return Tuple2.of(value1.f0, value1.f1 + value2.f1); } }) .keyBy(0) .addSink(new SumEvictingQueueSink()); stream // testing UDF folder .fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> fold( Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception { return Tuple2.of(value.f0, accumulator.f1 + value.f1); } }) .keyBy(0) .addSink(new FoldEvictingQueueSink()); }
Example #28
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
private static KeyedStream<Event, Integer> applyTestStatefulOperator( String name, JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc, KeyedStream<Event, Integer> source, List<TypeSerializer<ComplexPayload>> stateSer, List<Class<ComplexPayload>> stateClass) { return source .map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass)) .name(name) .uid(name) .returns(Event.class) .keyBy(Event::getKey); }
Example #29
Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0 | 5 votes |
private static KeyedStream<UpgradedEvent, Integer> applyUpgradedStatefulOperations( KeyedStream<UpgradedEvent, Integer> source, List<TypeSerializer<ComplexPayload>> stateSer, List<Class<ComplexPayload>> stateClass) { source = applyUpgradedTestStatefulOperator("stateMap2", simpleUpgradedStateUpdate("stateMap2"), source, stateSer, stateClass); source = applyUpgradedTestStatefulOperator("stateMap1", lastUpgradedStateUpdate("stateMap1"), source, stateSer, stateClass); return applyUpgradedTestStatefulOperator("stateMap3", simpleUpgradedStateUpdate("stateMap3"), source, stateSer, stateClass); }
Example #30
Source File: StatefulStreamJobUpgradeTestProgram.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static KeyedStream<Event, Integer> applyTestStatefulOperator( String name, JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc, KeyedStream<Event, Integer> source, List<TypeSerializer<ComplexPayload>> stateSer, List<Class<ComplexPayload>> stateClass) { return source .map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass)) .name(name) .uid(name) .returns(Event.class) .keyBy(Event::getKey); }