org.apache.flink.streaming.api.datastream.KeyedStream Java Exaples

Source File: FlinkGroupByKeyWrapper.java From flink-dataflow with Apache License 2.0

6 votes

public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) {
	final Coder<K> keyCoder = inputKvCoder.getKeyCoder();
	final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder);
	final boolean isKeyVoid = keyCoder instanceof VoidCoder;

	return inputDataStream.keyBy(
			new KeySelectorWithQueryableResultType<K, V>() {

				@Override
				public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
					return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
							value.getValue().getKey();
				}

				@Override
				public TypeInformation<K> getProducedType() {
					return keyTypeInfo;
				}
			});
}

Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0

6 votes

static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows(
		KeyedStream<Event, Integer> keyedStream, ParameterTool pt) {

	long eventTimeProgressPerEvent = pt.getLong(
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(),
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue());

	return keyedStream.timeWindow(
		Time.milliseconds(
			pt.getLong(
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(),
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue()
			) * eventTimeProgressPerEvent
		)
	);
}

Source File: SerialStreamingLedgerRuntimeProvider.java From da-streamingledger with Apache License 2.0

6 votes

@Override
public ResultStreams translate(String name, List<InputAndSpec<?, ?>> streamLedgerSpecs) {
    List<OutputTag<?>> sideOutputTags = createSideOutputTags(streamLedgerSpecs);

    // the input stream is a union of different streams.
    KeyedStream<TaggedElement, Boolean> input = union(streamLedgerSpecs)
            .keyBy(unused -> true);

    // main pipeline
    String serialTransactorName = "SerialTransactor(" + name + ")";
    SingleOutputStreamOperator<Void> resultStream = input
            .process(new SerialTransactor(specs(streamLedgerSpecs), sideOutputTags))
            .name(serialTransactorName)
            .uid(serialTransactorName + "___SERIAL_TX")
            .forceNonParallel()
            .returns(Void.class);

    // gather the sideOutputs.
    Map<String, DataStream<?>> output = new HashMap<>();
    for (OutputTag<?> outputTag : sideOutputTags) {
        DataStream<?> rs = resultStream.getSideOutput(outputTag);
        output.put(outputTag.getId(), rs);
    }
    return new ResultStreams(output);
}

Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0

6 votes

static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows(
		KeyedStream<Event, Integer> keyedStream, ParameterTool pt) {

	long eventTimeProgressPerEvent = pt.getLong(
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(),
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue());

	return keyedStream.timeWindow(
		Time.milliseconds(
			pt.getLong(
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(),
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue()
			) * eventTimeProgressPerEvent
		)
	);
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

6 votes

private static void executeOriginalVariant(StreamExecutionEnvironment env, ParameterTool pt) throws Exception {
	KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt))
		.name("EventSource")
		.uid("EventSource")
		.assignTimestampsAndWatermarks(createTimestampExtractor(pt))
		.keyBy(Event::getKey);

	List<TypeSerializer<ComplexPayload>> stateSer =
		Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig()));

	KeyedStream<Event, Integer> afterStatefulOperations =
		applyOriginalStatefulOperations(source, stateSer, Collections.emptyList());

	afterStatefulOperations
		.flatMap(createSemanticsCheckMapper(pt))
		.name("SemanticsCheckMapper")
		.addSink(new PrintSinkFunction<>());

	env.execute("General purpose test job");
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

6 votes

private static void executeUpgradedVariant(StreamExecutionEnvironment env, ParameterTool pt) throws Exception {
	KeyedStream<UpgradedEvent, Integer> source = env.addSource(createEventSource(pt))
		.name("EventSource")
		.uid("EventSource")
		.assignTimestampsAndWatermarks(createTimestampExtractor(pt))
		.map(new UpgradeEvent())
		.keyBy(UpgradedEvent::getKey);

	List<TypeSerializer<ComplexPayload>> stateSer =
		Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig()));

	KeyedStream<UpgradedEvent, Integer> afterStatefulOperations =
		applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList());

	afterStatefulOperations
		.map(new DowngradeEvent())
		.keyBy(Event::getKey)
		.flatMap(createSemanticsCheckMapper(pt))
		.name("SemanticsCheckMapper")
		.addSink(new PrintSinkFunction<>());

	env.execute("General purpose test job");
}

Source File: KafkaShuffleTestBase.java From flink with Apache License 2.0

6 votes

static KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> createKafkaShuffle(
		StreamExecutionEnvironment env,
		String topic,
		int numElementsPerProducer,
		int producerParallelism,
		TimeCharacteristic timeCharacteristic,
		int numberOfPartitions,
		boolean randomness) {
	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor(randomness)).setParallelism(producerParallelism) : source;

	return FlinkKafkaShuffle.persistentKeyBy(
		input,
		topic,
		producerParallelism,
		numberOfPartitions,
		kafkaServer.getStandardProperties(),
		0);
}

Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0

6 votes

static WindowedStream<Event, Integer, TimeWindow> applyTumblingWindows(
		KeyedStream<Event, Integer> keyedStream, ParameterTool pt) {

	long eventTimeProgressPerEvent = pt.getLong(
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.key(),
		SEQUENCE_GENERATOR_SRC_EVENT_TIME_CLOCK_PROGRESS.defaultValue());

	return keyedStream.timeWindow(
		Time.milliseconds(
			pt.getLong(
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.key(),
				TUMBLING_WINDOW_OPERATOR_NUM_EVENTS.defaultValue()
			) * eventTimeProgressPerEvent
		)
	);
}

Source File: KafkaShuffleTestBase.java From flink with Apache License 2.0

6 votes

static KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> createKafkaShuffle(
		StreamExecutionEnvironment env,
		String topic,
		int numElementsPerProducer,
		int producerParallelism,
		TimeCharacteristic timeCharacteristic,
		int numberOfPartitions) {
	return createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions,
		false);
}

Source File: UdfStreamOperatorCheckpointingITCase.java From flink with Apache License 2.0

5 votes

/**
 * Assembles a stream of a grouping field and some long data. Applies reduce functions
 * on this stream.
 */
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// base stream
	KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence())
			.keyBy(0);

	stream
			// testing built-in aggregate
			.min(1)
			// failure generation
			.map(new OnceFailingIdentityMapFunction(NUM_INPUT))
			.keyBy(0)
			.addSink(new MinEvictingQueueSink());

	stream
			// testing UDF reducer
			.reduce(new ReduceFunction<Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> reduce(
						Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception {
					return Tuple2.of(value1.f0, value1.f1 + value2.f1);
				}
			})
			.keyBy(0)
			.addSink(new SumEvictingQueueSink());

	stream
			// testing UDF folder
			.fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> fold(
						Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception {
					return Tuple2.of(value.f0, accumulator.f1 + value.f1);
				}
			})
			.keyBy(0)
			.addSink(new FoldEvictingQueueSink());
}

Source File: RideCount.java From flink-training-exercises with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String input = params.get("input", ExerciseBase.pathToRideData);

		final int maxEventDelay = 60;       // events are out of order by max 60 seconds
		final int servingSpeedFactor = 600; // events of 10 minutes are served every second

		// set up streaming execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// start the data generator
		DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));

		// map each ride to a tuple of (driverId, 1)
		DataStream<Tuple2<Long, Long>> tuples = rides.map(new MapFunction<TaxiRide, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(TaxiRide ride) throws Exception {
						return new Tuple2<Long, Long>(ride.driverId, 1L) ;
					}
		});

		// partition the stream by the driverId
		KeyedStream<Tuple2<Long, Long>, Tuple> keyedByDriverId = tuples.keyBy(0);

		// count the rides for each driver
		DataStream<Tuple2<Long, Long>> rideCounts = keyedByDriverId.sum(1);

		// we could, in fact, print out any or all of these streams
		rideCounts.print();

		// run the cleansing pipeline
		env.execute("Ride Count");
	}

Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0

5 votes

/**
 * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
 * This method assumes that <b>elements are already grouped by key</b>.
 * <p/>
 * The difference with {@link #create(PipelineOptions, PCollection, KeyedStream, Combine.KeyedCombineFn, KvCoder)}
 * is that this method assumes no combiner function
 * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
 *
 * @param options            the general job configuration options.
 * @param input              the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
 * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
 */
public static <K, VIN> DataStream<WindowedValue<KV<K, Iterable<VIN>>>> createForIterable(
		PipelineOptions options,
		PCollection input,
		KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey) {
	Preconditions.checkNotNull(options);

	KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
	Coder<K> keyCoder = inputKvCoder.getKeyCoder();
	Coder<VIN> inputValueCoder = inputKvCoder.getValueCoder();

	FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper(options,
			input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, null);

	Coder<Iterable<VIN>> valueIterCoder = IterableCoder.of(inputValueCoder);
	KvCoder<K, Iterable<VIN>> outputElemCoder = KvCoder.of(keyCoder, valueIterCoder);

	Coder<WindowedValue<KV<K, Iterable<VIN>>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
			outputElemCoder,
			input.getWindowingStrategy().getWindowFn().windowCoder());

	CoderTypeInformation<WindowedValue<KV<K, Iterable<VIN>>>> outputTypeInfo =
			new CoderTypeInformation<>(windowedOutputElemCoder);

	DataStream<WindowedValue<KV<K, Iterable<VIN>>>> groupedByKeyAndWindow = groupedStreamByKey
			.transform("GroupByWindow",
					new CoderTypeInformation<>(windowedOutputElemCoder),
					windower)
			.returns(outputTypeInfo);

	return groupedByKeyAndWindow;
}

Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0

5 votes

/**
 * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
 * This method assumes that <b>elements are already grouped by key</b>.
 * <p/>
 * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)}
 * is that this method assumes that a combiner function is provided
 * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
 * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state.
 *
 * @param options            the general job configuration options.
 * @param input              the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
 * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
 * @param combiner           the combiner to be used.
 * @param outputKvCoder      the type of the output values.
 */
public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create(
		PipelineOptions options,
		PCollection input,
		KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey,
		Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner,
		KvCoder<K, VOUT> outputKvCoder) {
	Preconditions.checkNotNull(options);

	KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
	FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options,
			input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner);

	Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
			outputKvCoder,
			input.getWindowingStrategy().getWindowFn().windowCoder());

	CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo =
			new CoderTypeInformation<>(windowedOutputElemCoder);

	DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey
			.transform("GroupByWindowWithCombiner",
					new CoderTypeInformation<>(outputKvCoder),
					windower)
			.returns(outputTypeInfo);

	return groupedByKeyAndWindow;
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

5 votes

private static KeyedStream<Event, Integer> applyTestStatefulOperator(
	String name,
	JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc,
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	return source
		.map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass))
		.name(name)
		.uid(name)
		.returns(Event.class)
		.keyBy(Event::getKey);
}

Source File: SummaryTreeReduce.java From gelly-streaming with Apache License 2.0

5 votes

private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) {

		if (input.getParallelism() <= 2) {
			return input;
		}

		int nextParal = input.getParallelism() / 2;
		DataStream<Tuple2<Integer, S>> unpartitionedStream =
				input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() {
					//collapse two partitions into one
					@Override
					public Integer getKey(Tuple2<Integer, S> record) throws Exception {
						return record.f0 / 2;
					}
				});

		//repartition stream to p / 2 aggregators
		KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream =
				unpartitionedStream.map(new PartitionReMapper()).returns(aggType)
						.setParallelism(nextParal)
						.keyBy(0);

		//window again on event time and aggregate
		DataStream<Tuple2<Integer, S>> aggregatedStream =
				repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS))
						.reduce(new AggregationWrapper<>(getCombineFun()))       
						.setParallelism(nextParal);
		return enhance(aggregatedStream, aggType);
	}

Source File: UdfStreamOperatorCheckpointingITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Assembles a stream of a grouping field and some long data. Applies reduce functions
 * on this stream.
 */
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// base stream
	KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence())
			.keyBy(0);

	stream
			// testing built-in aggregate
			.min(1)
			// failure generation
			.map(new OnceFailingIdentityMapFunction(NUM_INPUT))
			.keyBy(0)
			.addSink(new MinEvictingQueueSink());

	stream
			// testing UDF reducer
			.reduce(new ReduceFunction<Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> reduce(
						Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception {
					return Tuple2.of(value1.f0, value1.f1 + value2.f1);
				}
			})
			.keyBy(0)
			.addSink(new SumEvictingQueueSink());

	stream
			// testing UDF folder
			.fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> fold(
						Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception {
					return Tuple2.of(value.f0, accumulator.f1 + value.f1);
				}
			})
			.keyBy(0)
			.addSink(new FoldEvictingQueueSink());
}

Source File: KafkaShuffleExactlyOnceITCase.java From flink with Apache License 2.0

5 votes

/**
 * To test failure recovery with partition assignment after processing 2/3 data.
 *
 * <p>Schema: (key, timestamp, source instance Id).
 * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3
 */
private void testAssignedToPartitionFailureRecovery(
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	String topic = topic("partition_failure_recovery", timeCharacteristic);
	final int numberOfPartitions = 3;
	final int producerParallelism = 2;
	final int failAfterElements = numElementsPerProducer * producerParallelism * 2 / 3;

	createTestTopic(topic, numberOfPartitions, 1);

	final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic);

	KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions);
	keyedStream
		.process(new PartitionValidator(keyedStream.getKeySelector(), numberOfPartitions, topic))
		.setParallelism(numberOfPartitions)
		.map(new ToInteger(producerParallelism)).setParallelism(numberOfPartitions)
		.map(new FailingIdentityMapper<>(failAfterElements)).setParallelism(1)
		.addSink(new ValidatingExactlyOnceSink(numElementsPerProducer * producerParallelism)).setParallelism(1);

	FailingIdentityMapper.failedBefore = false;

	tryExecute(env, topic);

	deleteTestTopic(topic);
}

Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0

5 votes

@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KeyedWorkItem<K, InputT>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  WindowedValue.ValueOnlyWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder = WindowedValue.getValueOnlyCoder(workItemCoder);

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItemInGlobalWindow<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream);
}

Source File: KeyedTransformations.java From examples-java with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // use event time for the application
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        // configure watermark interval
        env.getConfig().setAutoWatermarkInterval(1000L);

        // ingest sensor stream
        DataStream<SensorReading> readings = env
            // SensorSource generates random temperature readings
            .addSource(new SensorSource())
            // assign timestamps and watermarks which are required for event time
            .assignTimestampsAndWatermarks(new SensorTimeAssigner());

        // group sensor readings by sensor id
        KeyedStream<SensorReading, String> keyed = readings
            .keyBy(r -> r.id);

        // a rolling reduce that computes the highest temperature of each sensor and
        // the corresponding timestamp
        DataStream<SensorReading> maxTempPerSensor = keyed
            .reduce((r1, r2) -> {
                if (r1.temperature > r2.temperature) {
                    return r1;
                } else {
                    return r2;
                }
            });

        maxTempPerSensor.print();

        // execute application
        env.execute("Keyed Transformations Example");
    }

Source File: MultiStreamTransformations.java From examples-java with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // use event time for the application
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        // configure watermark interval
        env.getConfig().setAutoWatermarkInterval(1000L);

        // ingest sensor stream
        DataStream<SensorReading> tempReadings = env
            // SensorSource generates random temperature readings
            .addSource(new SensorSource())
            // assign timestamps and watermarks which are required for event time
            .assignTimestampsAndWatermarks(new SensorTimeAssigner());

        // ingest smoke level stream
        DataStream<SmokeLevel> smokeReadings = env
            .addSource(new SmokeLevelSource())
            .setParallelism(1);

        // group sensor readings by sensor id
        KeyedStream<SensorReading, String> keyedTempReadings = tempReadings
            .keyBy(r -> r.id);

        // connect the two streams and raise an alert if the temperature and
        // smoke levels are high
        DataStream<Alert> alerts = keyedTempReadings
            .connect(smokeReadings.broadcast())
            .flatMap(new RaiseAlertFlatMap());

        alerts.print();

        // execute the application
        env.execute("Multi-Stream Transformations Example");
    }

Source File: KafkaShuffleITCase.java From flink with Apache License 2.0

5 votes

/**
 * To test data is partitioned to the right partition.
 *
 * <p>Schema: (key, timestamp, source instance Id).
 * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3
 */
private void testAssignedToPartition(
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	String topic = topic("test_assigned_to_partition", timeCharacteristic);
	final int numberOfPartitions = 3;
	final int producerParallelism = 2;

	createTestTopic(topic, numberOfPartitions, 1);

	final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic);

	KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions);
	keyedStream
		.process(new PartitionValidator(keyedStream.getKeySelector(), numberOfPartitions, topic))
		.setParallelism(numberOfPartitions)
		.map(new ElementCountNoMoreThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1)
		.map(new ElementCountNoLessThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1);

	tryExecute(env, topic);

	deleteTestTopic(topic);
}

Source File: KafkaShuffleITCase.java From flink with Apache License 2.0

5 votes

/**
 * To watermark from the consumer side always increase.
 *
 * <p>Schema: (key, timestamp, source instance Id).
 * Producer Parallelism = 2; Kafka Partition # = 3; Consumer Parallelism = 3
 */
private void testWatermarkIncremental(int numElementsPerProducer) throws Exception {
	TimeCharacteristic timeCharacteristic = EventTime;
	String topic = topic("test_watermark_incremental", timeCharacteristic);
	final int numberOfPartitions = 3;
	final int producerParallelism = 2;

	createTestTopic(topic, numberOfPartitions, 1);

	final StreamExecutionEnvironment env = createEnvironment(producerParallelism, timeCharacteristic);

	KeyedStream<Tuple3<Integer, Long, Integer>, Tuple> keyedStream = createKafkaShuffle(
		env,
		topic,
		numElementsPerProducer,
		producerParallelism,
		timeCharacteristic,
		numberOfPartitions,
		true);
	keyedStream
		.process(new WatermarkValidator())
		.setParallelism(numberOfPartitions)
		.map(new ElementCountNoMoreThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1)
		.map(new ElementCountNoLessThanValidator(numElementsPerProducer * producerParallelism)).setParallelism(1);

	tryExecute(env, topic);

	deleteTestTopic(topic);
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

5 votes

private static KeyedStream<Event, Integer> applyUpgradedStatefulOperations(
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	source = applyTestStatefulOperator("stateMap2", simpleStateUpdate("stateMap2"), source, stateSer, stateClass);
	source = applyTestStatefulOperator("stateMap1", lastStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyTestStatefulOperator("stateMap3", simpleStateUpdate("stateMap3"), source, stateSer, stateClass);
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

5 votes

private static KeyedStream<Event, Integer> applyOriginalStatefulOperations(
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	source = applyTestStatefulOperator("stateMap1", simpleStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyTestStatefulOperator("stateMap2", lastStateUpdate("stateMap2"), source, stateSer, stateClass);
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool pt = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	setupEnvironment(env, pt);

	KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt))
		.name("EventSource")
		.uid("EventSource")
		.assignTimestampsAndWatermarks(createTimestampExtractor(pt))
		.keyBy(Event::getKey);

	List<TypeSerializer<ComplexPayload>> stateSer =
		Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig()));

	KeyedStream<Event, Integer> afterStatefulOperations = isOriginalJobVariant(pt) ?
		applyOriginalStatefulOperations(source, stateSer, Collections.emptyList()) :
		applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList());

	afterStatefulOperations
		.flatMap(createSemanticsCheckMapper(pt))
		.name("SemanticsCheckMapper")
		.addSink(new PrintSinkFunction<>());

	env.execute("General purpose test job");
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

5 votes

private static KeyedStream<Event, Integer> applyOriginalStatefulOperations(
		KeyedStream<Event, Integer> source,
		List<TypeSerializer<ComplexPayload>> stateSer,
		List<Class<ComplexPayload>> stateClass) {
	source = applyTestStatefulOperator("stateMap1", simpleStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyTestStatefulOperator("stateMap2", lastStateUpdate("stateMap2"), source, stateSer, stateClass);
}

Source File: UdfStreamOperatorCheckpointingITCase.java From flink with Apache License 2.0

5 votes

/**
 * Assembles a stream of a grouping field and some long data. Applies reduce functions
 * on this stream.
 */
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// base stream
	KeyedStream<Tuple2<Integer, Long>, Tuple> stream = env.addSource(new StatefulMultipleSequence())
			.keyBy(0);

	stream
			// testing built-in aggregate
			.min(1)
			// failure generation
			.map(new OnceFailingIdentityMapFunction(NUM_INPUT))
			.keyBy(0)
			.addSink(new MinEvictingQueueSink());

	stream
			// testing UDF reducer
			.reduce(new ReduceFunction<Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> reduce(
						Tuple2<Integer, Long> value1, Tuple2<Integer, Long> value2) throws Exception {
					return Tuple2.of(value1.f0, value1.f1 + value2.f1);
				}
			})
			.keyBy(0)
			.addSink(new SumEvictingQueueSink());

	stream
			// testing UDF folder
			.fold(Tuple2.of(0, 0L), new FoldFunction<Tuple2<Integer, Long>, Tuple2<Integer, Long>>() {
				@Override
				public Tuple2<Integer, Long> fold(
						Tuple2<Integer, Long> accumulator, Tuple2<Integer, Long> value) throws Exception {
					return Tuple2.of(value.f0, accumulator.f1 + value.f1);
				}
			})
			.keyBy(0)
			.addSink(new FoldEvictingQueueSink());
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

5 votes

private static KeyedStream<Event, Integer> applyTestStatefulOperator(
		String name,
		JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc,
		KeyedStream<Event, Integer> source,
		List<TypeSerializer<ComplexPayload>> stateSer,
		List<Class<ComplexPayload>> stateClass) {
	return source
		.map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass))
		.name(name)
		.uid(name)
		.returns(Event.class)
		.keyBy(Event::getKey);
}

Source File: StatefulStreamJobUpgradeTestProgram.java From flink with Apache License 2.0

5 votes

private static KeyedStream<UpgradedEvent, Integer> applyUpgradedStatefulOperations(
		KeyedStream<UpgradedEvent, Integer> source,
		List<TypeSerializer<ComplexPayload>> stateSer,
		List<Class<ComplexPayload>> stateClass) {
	source = applyUpgradedTestStatefulOperator("stateMap2", simpleUpgradedStateUpdate("stateMap2"), source, stateSer, stateClass);
	source = applyUpgradedTestStatefulOperator("stateMap1", lastUpgradedStateUpdate("stateMap1"), source, stateSer, stateClass);
	return applyUpgradedTestStatefulOperator("stateMap3", simpleUpgradedStateUpdate("stateMap3"), source, stateSer, stateClass);
}

Source File: StatefulStreamJobUpgradeTestProgram.java From Flink-CEPplus with Apache License 2.0

5 votes

private static KeyedStream<Event, Integer> applyTestStatefulOperator(
	String name,
	JoinFunction<Event, ComplexPayload, ComplexPayload> stateFunc,
	KeyedStream<Event, Integer> source,
	List<TypeSerializer<ComplexPayload>> stateSer,
	List<Class<ComplexPayload>> stateClass) {
	return source
		.map(createArtificialKeyedStateMapper(e -> e, stateFunc, stateSer, stateClass))
		.name(name)
		.uid(name)
		.returns(Event.class)
		.keyBy(Event::getKey);
}

org.apache.flink.streaming.api.datastream.KeyedStream Java Examples