Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#keyBy()
The following examples show how to use
org.apache.flink.streaming.api.datastream.DataStream#keyBy() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataStreamTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testPOJOWithNestedArrayNoHashCodeKeyRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithHashCode> input = env.fromElements( new POJOWithHashCode(new int[] {1, 2})); TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>( PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key.")); input.keyBy("id"); }
Example 2
Source File: FlinkGroupByKeyWrapper.java From flink-dataflow with Apache License 2.0 | 6 votes |
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) { final Coder<K> keyCoder = inputKvCoder.getKeyCoder(); final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder); final boolean isKeyVoid = keyCoder instanceof VoidCoder; return inputDataStream.keyBy( new KeySelectorWithQueryableResultType<K, V>() { @Override public K getKey(WindowedValue<KV<K, V>> value) throws Exception { return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE : value.getValue().getKey(); } @Override public TypeInformation<K> getProducedType() { return keyTypeInfo; } }); }
Example 3
Source File: Utils.java From incubator-samoa with Apache License 2.0 | 6 votes |
public static DataStream subscribe(DataStream<SamoaType> stream, PartitioningScheme partitioning) { switch (partitioning) { case BROADCAST: return stream.broadcast(); case GROUP_BY_KEY: return stream.keyBy(new KeySelector<SamoaType, String>() { @Override public String getKey(SamoaType samoaType) throws Exception { return samoaType.f0; } }); case SHUFFLE: default: return stream.shuffle(); } }
Example 4
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testPOJOWithNestedArrayNoHashCodeKeyRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithHashCode> input = env.fromElements( new POJOWithHashCode(new int[] {1, 2})); TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>( PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key.")); input.keyBy("id"); }
Example 5
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testTupleNestedArrayKeyRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "test-test")); TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple2<Integer[], String>>( BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key.")); input.keyBy(new KeySelector<Tuple2<Integer[], String>, Tuple2<Integer[], String>>() { @Override public Tuple2<Integer[], String> getKey(Tuple2<Integer[], String> value) throws Exception { return value; } }); }
Example 6
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testTupleNestedArrayKeyRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "test-test")); TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple2<Integer[], String>>( BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key.")); input.keyBy(new KeySelector<Tuple2<Integer[], String>, Tuple2<Integer[], String>>() { @Override public Tuple2<Integer[], String> getKey(Tuple2<Integer[], String> value) throws Exception { return value; } }); }
Example 7
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testPOJOnoHashCodeKeyRejection() { KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector = new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() { @Override public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception { return value; } }; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithoutHashCode> input = env.fromElements( new POJOWithoutHashCode(new int[] {1, 2})); // adjust the rule expectedException.expect(InvalidProgramException.class); input.keyBy(keySelector); }
Example 8
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testPOJOWithNestedArrayNoHashCodeKeyRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithHashCode> input = env.fromElements( new POJOWithHashCode(new int[] {1, 2})); TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>( PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key.")); input.keyBy("id"); }
Example 9
Source File: DataStreamTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testPOJOnoHashCodeKeyRejection() { KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector = new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() { @Override public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception { return value; } }; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithoutHashCode> input = env.fromElements( new POJOWithoutHashCode(new int[] {1, 2})); // adjust the rule expectedException.expect(InvalidProgramException.class); input.keyBy(keySelector); }
Example 10
Source File: DataStreamTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testPOJOnoHashCodeKeyRejection() { KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector = new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() { @Override public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception { return value; } }; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithoutHashCode> input = env.fromElements( new POJOWithoutHashCode(new int[] {1, 2})); // adjust the rule expectedException.expect(InvalidProgramException.class); input.keyBy(keySelector); }
Example 11
Source File: DataStreamTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testEnumKeyRejection() { KeySelector<Tuple2<TestEnum, String>, TestEnum> keySelector = value -> value.f0; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<TestEnum, String>> input = env.fromElements( Tuple2.of(TestEnum.FOO, "Foo"), Tuple2.of(TestEnum.BAR, "Bar")); expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + EnumTypeInfo.of(TestEnum.class) + " cannot be used as key.")); input.keyBy(keySelector); }
Example 12
Source File: MultiStreamTransformations.java From examples-java with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // set up the streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // use event time for the application env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // configure watermark interval env.getConfig().setAutoWatermarkInterval(1000L); // ingest sensor stream DataStream<SensorReading> tempReadings = env // SensorSource generates random temperature readings .addSource(new SensorSource()) // assign timestamps and watermarks which are required for event time .assignTimestampsAndWatermarks(new SensorTimeAssigner()); // ingest smoke level stream DataStream<SmokeLevel> smokeReadings = env .addSource(new SmokeLevelSource()) .setParallelism(1); // group sensor readings by sensor id KeyedStream<SensorReading, String> keyedTempReadings = tempReadings .keyBy(r -> r.id); // connect the two streams and raise an alert if the temperature and // smoke levels are high DataStream<Alert> alerts = keyedTempReadings .connect(smokeReadings.broadcast()) .flatMap(new RaiseAlertFlatMap()); alerts.print(); // execute the application env.execute("Multi-Stream Transformations Example"); }
Example 13
Source File: DataStreamTest.java From flink with Apache License 2.0 | 5 votes |
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "barfoo") ); Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType())); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key.")); input.keyBy(keySelector); }
Example 14
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateNode( PTransform<PCollection<KV<K, InputT>>, PCollection<KeyedWorkItem<K, InputT>>> transform, FlinkStreamingTranslationContext context) { PCollection<KV<K, InputT>> input = context.getInput(transform); KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder(); SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of( inputKvCoder.getKeyCoder(), inputKvCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder()); WindowedValue.ValueOnlyWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder = WindowedValue.getValueOnlyCoder(workItemCoder); CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder); DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input); DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream = inputDataStream .flatMap(new ToKeyedWorkItemInGlobalWindow<>(context.getPipelineOptions())) .returns(workItemTypeInfo) .name("ToKeyedWorkItem"); KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder())); context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream); }
Example 15
Source File: SummaryTreeReduce.java From gelly-streaming with Apache License 2.0 | 5 votes |
private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) { if (input.getParallelism() <= 2) { return input; } int nextParal = input.getParallelism() / 2; DataStream<Tuple2<Integer, S>> unpartitionedStream = input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() { //collapse two partitions into one @Override public Integer getKey(Tuple2<Integer, S> record) throws Exception { return record.f0 / 2; } }); //repartition stream to p / 2 aggregators KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream = unpartitionedStream.map(new PartitionReMapper()).returns(aggType) .setParallelism(nextParal) .keyBy(0); //window again on event time and aggregate DataStream<Tuple2<Integer, S>> aggregatedStream = repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .reduce(new AggregationWrapper<>(getCombineFun())) .setParallelism(nextParal); return enhance(aggregatedStream, aggType); }
Example 16
Source File: DataStreamTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "barfoo") ); Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType())); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key.")); input.keyBy(keySelector); }
Example 17
Source File: RideCount.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // start the data generator DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)); // map each ride to a tuple of (driverId, 1) DataStream<Tuple2<Long, Long>> tuples = rides.map(new MapFunction<TaxiRide, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(TaxiRide ride) throws Exception { return new Tuple2<Long, Long>(ride.driverId, 1L) ; } }); // partition the stream by the driverId KeyedStream<Tuple2<Long, Long>, Tuple> keyedByDriverId = tuples.keyBy(0); // count the rides for each driver DataStream<Tuple2<Long, Long>> rideCounts = keyedByDriverId.sum(1); // we could, in fact, print out any or all of these streams rideCounts.print(); // run the cleansing pipeline env.execute("Ride Count"); }
Example 18
Source File: LongRidesCEPExercise.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // CheckpointedTaxiRideSource delivers events in order DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new CheckpointedTaxiRideSource(input, servingSpeedFactor))); DataStream<TaxiRide> keyedRides = rides .keyBy("rideId"); // A complete taxi ride has a START event followed by an END event // This pattern is incomplete ... Pattern<TaxiRide, TaxiRide> completedRides = Pattern.<TaxiRide>begin("start"); // We want to find rides that have NOT been completed within 120 minutes. // This pattern matches rides that ARE completed. // Below we will ignore rides that match this pattern, and emit those that timeout. PatternStream<TaxiRide> patternStream = CEP.pattern(keyedRides, completedRides.within(Time.minutes(120))); OutputTag<TaxiRide> timedout = new OutputTag<TaxiRide>("timedout"){}; SingleOutputStreamOperator<TaxiRide> longRides = patternStream.flatSelect( timedout, new TaxiRideTimedOut<TaxiRide>(), new FlatSelectNothing<TaxiRide>() ); printOrTest(longRides.getSideOutput(timedout)); throw new MissingSolutionException(); // env.execute("Long Taxi Rides (CEP)"); }
Example 19
Source File: DataStreamTest.java From flink with Apache License 2.0 | 5 votes |
private <K> void assertArrayKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "barfoo")); Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType())); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key.")); input.keyBy(keySelector); }
Example 20
Source File: FlinkStreamingPortablePipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK( DataStream<WindowedValue<KV<K, V>>> inputDataStream, WindowingStrategy<?, ?> windowingStrategy, WindowedValueCoder<KV<K, V>> windowedInputCoder, String operatorName, StreamingTranslationContext context) { KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder(); SingletonKeyedWorkItemCoder<K, V> workItemCoder = SingletonKeyedWorkItemCoder.of( inputElementCoder.getKeyCoder(), inputElementCoder.getValueCoder(), windowingStrategy.getWindowFn().windowCoder()); WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder = WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder); DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream = inputDataStream .flatMap( new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>( context.getPipelineOptions())) .returns(workItemTypeInfo) .name("ToKeyedWorkItem"); WorkItemKeySelector<K, V> keySelector = new WorkItemKeySelector<>(inputElementCoder.getKeyCoder()); KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(keySelector); SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputElementCoder.getValueCoder()); Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder()); Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder = WindowedValue.getFullCoder( KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder()); TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo = new CoderTypeInformation<>(outputCoder); TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output"); WindowDoFnOperator<K, V, Iterable<V>> doFnOperator = new WindowDoFnOperator<>( reduceFn, operatorName, (Coder) windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder), windowingStrategy, new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ context.getPipelineOptions(), inputElementCoder.getKeyCoder(), (KeySelector) keySelector /* key selector */); SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = keyedWorkItemStream.transform( operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator); return outputDataStream; }