Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#keyBy()
The following examples show how to use
org.apache.flink.streaming.api.datastream.DataStream#keyBy() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataStreamTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test
public void testPOJOWithNestedArrayNoHashCodeKeyRejection() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<POJOWithHashCode> input = env.fromElements(
new POJOWithHashCode(new int[] {1, 2}));
TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>(
PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO);
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));
input.keyBy("id");
}
Example 2
Source File: FlinkGroupByKeyWrapper.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) {
final Coder<K> keyCoder = inputKvCoder.getKeyCoder();
final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder);
final boolean isKeyVoid = keyCoder instanceof VoidCoder;
return inputDataStream.keyBy(
new KeySelectorWithQueryableResultType<K, V>() {
@Override
public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
value.getValue().getKey();
}
@Override
public TypeInformation<K> getProducedType() {
return keyTypeInfo;
}
});
}
Example 3
Source File: Utils.java From incubator-samoa with Apache License 2.0 | 6 votes |
public static DataStream subscribe(DataStream<SamoaType> stream, PartitioningScheme partitioning) {
switch (partitioning) {
case BROADCAST:
return stream.broadcast();
case GROUP_BY_KEY:
return stream.keyBy(new KeySelector<SamoaType, String>() {
@Override
public String getKey(SamoaType samoaType) throws Exception {
return samoaType.f0;
}
});
case SHUFFLE:
default:
return stream.shuffle();
}
}
Example 4
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test
public void testPOJOWithNestedArrayNoHashCodeKeyRejection() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<POJOWithHashCode> input = env.fromElements(
new POJOWithHashCode(new int[] {1, 2}));
TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>(
PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO);
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));
input.keyBy("id");
}
Example 5
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test
public void testTupleNestedArrayKeyRejection() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer[], String>> input = env.fromElements(
new Tuple2<>(new Integer[] {1, 2}, "test-test"));
TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple2<Integer[], String>>(
BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));
input.keyBy(new KeySelector<Tuple2<Integer[], String>, Tuple2<Integer[], String>>() {
@Override
public Tuple2<Integer[], String> getKey(Tuple2<Integer[], String> value) throws Exception {
return value;
}
});
}
Example 6
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test
public void testTupleNestedArrayKeyRejection() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer[], String>> input = env.fromElements(
new Tuple2<>(new Integer[] {1, 2}, "test-test"));
TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple2<Integer[], String>>(
BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));
input.keyBy(new KeySelector<Tuple2<Integer[], String>, Tuple2<Integer[], String>>() {
@Override
public Tuple2<Integer[], String> getKey(Tuple2<Integer[], String> value) throws Exception {
return value;
}
});
}
Example 7
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test
public void testPOJOnoHashCodeKeyRejection() {
KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector =
new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() {
@Override
public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception {
return value;
}
};
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<POJOWithoutHashCode> input = env.fromElements(
new POJOWithoutHashCode(new int[] {1, 2}));
// adjust the rule
expectedException.expect(InvalidProgramException.class);
input.keyBy(keySelector);
}
Example 8
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test
public void testPOJOWithNestedArrayNoHashCodeKeyRejection() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<POJOWithHashCode> input = env.fromElements(
new POJOWithHashCode(new int[] {1, 2}));
TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>(
PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO);
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key."));
input.keyBy("id");
}
Example 9
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test
public void testPOJOnoHashCodeKeyRejection() {
KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector =
new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() {
@Override
public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception {
return value;
}
};
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<POJOWithoutHashCode> input = env.fromElements(
new POJOWithoutHashCode(new int[] {1, 2}));
// adjust the rule
expectedException.expect(InvalidProgramException.class);
input.keyBy(keySelector);
}
Example 10
Source File: DataStreamTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test
public void testPOJOnoHashCodeKeyRejection() {
KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector =
new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() {
@Override
public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception {
return value;
}
};
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<POJOWithoutHashCode> input = env.fromElements(
new POJOWithoutHashCode(new int[] {1, 2}));
// adjust the rule
expectedException.expect(InvalidProgramException.class);
input.keyBy(keySelector);
}
Example 11
Source File: DataStreamTest.java From flink with Apache License 2.0 | 5 votes |
@Test
public void testEnumKeyRejection() {
KeySelector<Tuple2<TestEnum, String>, TestEnum> keySelector = value -> value.f0;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<TestEnum, String>> input = env.fromElements(
Tuple2.of(TestEnum.FOO, "Foo"),
Tuple2.of(TestEnum.BAR, "Bar"));
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + EnumTypeInfo.of(TestEnum.class) + " cannot be used as key."));
input.keyBy(keySelector);
}
Example 12
Source File: MultiStreamTransformations.java From examples-java with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception {
// set up the streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// use event time for the application
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// configure watermark interval
env.getConfig().setAutoWatermarkInterval(1000L);
// ingest sensor stream
DataStream<SensorReading> tempReadings = env
// SensorSource generates random temperature readings
.addSource(new SensorSource())
// assign timestamps and watermarks which are required for event time
.assignTimestampsAndWatermarks(new SensorTimeAssigner());
// ingest smoke level stream
DataStream<SmokeLevel> smokeReadings = env
.addSource(new SmokeLevelSource())
.setParallelism(1);
// group sensor readings by sensor id
KeyedStream<SensorReading, String> keyedTempReadings = tempReadings
.keyBy(r -> r.id);
// connect the two streams and raise an alert if the temperature and
// smoke levels are high
DataStream<Alert> alerts = keyedTempReadings
.connect(smokeReadings.broadcast())
.flatMap(new RaiseAlertFlatMap());
alerts.print();
// execute the application
env.execute("Multi-Stream Transformations Example");
}
Example 13
Source File: DataStreamTest.java From flink with Apache License 2.0 | 5 votes |
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer[], String>> input = env.fromElements(
new Tuple2<>(new Integer[] {1, 2}, "barfoo")
);
Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType()));
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key."));
input.keyBy(keySelector);
}
Example 14
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override
public void translateNode(
PTransform<PCollection<KV<K, InputT>>, PCollection<KeyedWorkItem<K, InputT>>> transform,
FlinkStreamingTranslationContext context) {
PCollection<KV<K, InputT>> input = context.getInput(transform);
KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();
SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
SingletonKeyedWorkItemCoder.of(
inputKvCoder.getKeyCoder(),
inputKvCoder.getValueCoder(),
input.getWindowingStrategy().getWindowFn().windowCoder());
WindowedValue.ValueOnlyWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
windowedWorkItemCoder = WindowedValue.getValueOnlyCoder(workItemCoder);
CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
new CoderTypeInformation<>(windowedWorkItemCoder);
DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);
DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
inputDataStream
.flatMap(new ToKeyedWorkItemInGlobalWindow<>(context.getPipelineOptions()))
.returns(workItemTypeInfo)
.name("ToKeyedWorkItem");
KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
keyedWorkItemStream =
workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));
context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream);
}
Example 15
Source File: SummaryTreeReduce.java From gelly-streaming with Apache License 2.0 | 5 votes |
private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) { if (input.getParallelism() <= 2) { return input; } int nextParal = input.getParallelism() / 2; DataStream<Tuple2<Integer, S>> unpartitionedStream = input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() { //collapse two partitions into one @Override public Integer getKey(Tuple2<Integer, S> record) throws Exception { return record.f0 / 2; } }); //repartition stream to p / 2 aggregators KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream = unpartitionedStream.map(new PartitionReMapper()).returns(aggType) .setParallelism(nextParal) .keyBy(0); //window again on event time and aggregate DataStream<Tuple2<Integer, S>> aggregatedStream = repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .reduce(new AggregationWrapper<>(getCombineFun())) .setParallelism(nextParal); return enhance(aggregatedStream, aggType); }
Example 16
Source File: DataStreamTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer[], String>> input = env.fromElements(
new Tuple2<>(new Integer[] {1, 2}, "barfoo")
);
Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType()));
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key."));
input.keyBy(keySelector);
}
Example 17
Source File: RideCount.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
final String input = params.get("input", ExerciseBase.pathToRideData);
final int maxEventDelay = 60; // events are out of order by max 60 seconds
final int servingSpeedFactor = 600; // events of 10 minutes are served every second
// set up streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// start the data generator
DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));
// map each ride to a tuple of (driverId, 1)
DataStream<Tuple2<Long, Long>> tuples = rides.map(new MapFunction<TaxiRide, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(TaxiRide ride) throws Exception {
return new Tuple2<Long, Long>(ride.driverId, 1L) ;
}
});
// partition the stream by the driverId
KeyedStream<Tuple2<Long, Long>, Tuple> keyedByDriverId = tuples.keyBy(0);
// count the rides for each driver
DataStream<Tuple2<Long, Long>> rideCounts = keyedByDriverId.sum(1);
// we could, in fact, print out any or all of these streams
rideCounts.print();
// run the cleansing pipeline
env.execute("Ride Count");
}
Example 18
Source File: LongRidesCEPExercise.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
final String input = params.get("input", ExerciseBase.pathToRideData);
final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(ExerciseBase.parallelism);
// CheckpointedTaxiRideSource delivers events in order
DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new CheckpointedTaxiRideSource(input, servingSpeedFactor)));
DataStream<TaxiRide> keyedRides = rides
.keyBy("rideId");
// A complete taxi ride has a START event followed by an END event
// This pattern is incomplete ...
Pattern<TaxiRide, TaxiRide> completedRides = Pattern.<TaxiRide>begin("start");
// We want to find rides that have NOT been completed within 120 minutes.
// This pattern matches rides that ARE completed.
// Below we will ignore rides that match this pattern, and emit those that timeout.
PatternStream<TaxiRide> patternStream = CEP.pattern(keyedRides, completedRides.within(Time.minutes(120)));
OutputTag<TaxiRide> timedout = new OutputTag<TaxiRide>("timedout"){};
SingleOutputStreamOperator<TaxiRide> longRides = patternStream.flatSelect(
timedout,
new TaxiRideTimedOut<TaxiRide>(),
new FlatSelectNothing<TaxiRide>()
);
printOrTest(longRides.getSideOutput(timedout));
throw new MissingSolutionException();
// env.execute("Long Taxi Rides (CEP)");
}
Example 19
Source File: DataStreamTest.java From flink with Apache License 2.0 | 5 votes |
private <K> void assertArrayKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer[], String>> input = env.fromElements(
new Tuple2<>(new Integer[] {1, 2}, "barfoo"));
Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType()));
// adjust the rule
expectedException.expect(InvalidProgramException.class);
expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key."));
input.keyBy(keySelector);
}
Example 20
Source File: FlinkStreamingPortablePipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(
DataStream<WindowedValue<KV<K, V>>> inputDataStream,
WindowingStrategy<?, ?> windowingStrategy,
WindowedValueCoder<KV<K, V>> windowedInputCoder,
String operatorName,
StreamingTranslationContext context) {
KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();
SingletonKeyedWorkItemCoder<K, V> workItemCoder =
SingletonKeyedWorkItemCoder.of(
inputElementCoder.getKeyCoder(),
inputElementCoder.getValueCoder(),
windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder =
WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());
CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo =
new CoderTypeInformation<>(windowedWorkItemCoder);
DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream =
inputDataStream
.flatMap(
new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(
context.getPipelineOptions()))
.returns(workItemTypeInfo)
.name("ToKeyedWorkItem");
WorkItemKeySelector<K, V> keySelector =
new WorkItemKeySelector<>(inputElementCoder.getKeyCoder());
KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream =
workItemStream.keyBy(keySelector);
SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn =
SystemReduceFn.buffering(inputElementCoder.getValueCoder());
Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());
Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder =
WindowedValue.getFullCoder(
KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder),
windowingStrategy.getWindowFn().windowCoder());
TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo =
new CoderTypeInformation<>(outputCoder);
TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");
WindowDoFnOperator<K, V, Iterable<V>> doFnOperator =
new WindowDoFnOperator<>(
reduceFn,
operatorName,
(Coder) windowedWorkItemCoder,
mainTag,
Collections.emptyList(),
new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder),
windowingStrategy,
new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
context.getPipelineOptions(),
inputElementCoder.getKeyCoder(),
(KeySelector) keySelector /* key selector */);
SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream =
keyedWorkItemStream.transform(
operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator);
return outputDataStream;
}