org.apache.flink.api.java.operators.GroupReduceOperator Java Examples
The following examples show how to use
org.apache.flink.api.java.operators.GroupReduceOperator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction2()) .withForwardedFields("0->4;1;1->3;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) == 4); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(4)); assertTrue(semProps.getReadFields(0).contains(7)); assertTrue(semProps.getReadFields(0).contains(8)); }
Example #2
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction1()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) == 4); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(4)); assertTrue(semProps.getReadFields(0).contains(7)); assertTrue(semProps.getReadFields(0).contains(8)); }
Example #3
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction2()) .withForwardedFields("0->4;1;1->3;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
Example #4
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction2()) .withForwardedFields("0->4;1;1->3;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) == 4); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(4)); assertTrue(semProps.getReadFields(0).contains(7)); assertTrue(semProps.getReadFields(0).contains(8)); }
Example #5
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction3()) .withForwardedFields("4->0;3;3->1;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0)); assertTrue(semProps.getForwardingSourceField(0, 0) == 6); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #6
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector6() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction3()) .withForwardedFields("4->0;3;3->1;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 7).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 7).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 8).contains(0)); assertTrue(semProps.getForwardingSourceField(0, 0) == 8); assertTrue(semProps.getForwardingSourceField(0, 1) == 7); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 7); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #7
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector7() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction4()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(0)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) == 2); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) < 0); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #8
Source File: DataSetUtils.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Generate a sample of DataSet which contains fixed size elements. * * <p><strong>NOTE:</strong> Sample with fixed size is not as efficient as sample with fraction, use sample with * fraction unless you need exact precision. * * @param withReplacement Whether element can be selected more than once. * @param numSamples The expected sample size. * @param seed Random number generator seed. * @return The sampled DataSet */ public static <T> DataSet<T> sampleWithSize( DataSet <T> input, final boolean withReplacement, final int numSamples, final long seed) { SampleInPartition<T> sampleInPartition = new SampleInPartition<>(withReplacement, numSamples, seed); MapPartitionOperator mapPartitionOperator = input.mapPartition(sampleInPartition); // There is no previous group, so the parallelism of GroupReduceOperator is always 1. String callLocation = Utils.getCallLocationName(); SampleInCoordinator<T> sampleInCoordinator = new SampleInCoordinator<>(withReplacement, numSamples, seed); return new GroupReduceOperator<>(mapPartitionOperator, input.getType(), sampleInCoordinator, callLocation); }
Example #9
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction3()) .withForwardedFields("4->0;3;3->1;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0)); assertTrue(semProps.getForwardingSourceField(0, 0) == 6); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #10
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction1()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
Example #11
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction2()) .withForwardedFields("0->4;1;1->3;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
Example #12
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction1()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) == 4); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(4)); assertTrue(semProps.getReadFields(0).contains(7)); assertTrue(semProps.getReadFields(0).contains(8)); }
Example #13
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction1()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
Example #14
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 5 votes |
private void translateSink(FlowProcess flowProcess, DataSet<Tuple> input, FlowNode node) { Tap tap = this.getSingle(node.getSinkTaps()); Configuration sinkConfig = this.getNodeConfig(node); tap.sinkConfInit(flowProcess, sinkConfig); int desiredDop = tap.getScheme().getNumSinkParts(); int inputDop = ((Operator)input).getParallelism(); int dop; if (inputDop == 1) { // input operators have dop 1. Probably because they perform a non-keyed reduce or coGroup dop = 1; } else { if (desiredDop > 0) { // output dop explicitly set. if (input instanceof GroupReduceOperator) { // input is a reduce and we must preserve its sorting. // we must set the desired dop also for reduce and related operators adjustDopOfReduceOrCoGroup((GroupReduceOperator) input, desiredDop); } dop = desiredDop; } else { dop = inputDop; } } input .output(new TapOutputFormat(node)) .name(tap.getIdentifier()) .setParallelism(dop) .withParameters(FlinkConfigConverter.toFlinkConfig(sinkConfig)); }
Example #15
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey.GroupByKeyOnly<K, V> transform, FlinkBatchTranslationContext context) { DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform)); GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>(); TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform)); Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType())); GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet = new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example #16
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey<K, V> transform, FlinkBatchTranslationContext context) { DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform)); GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>(); TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform)); Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType())); GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet = new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example #17
Source File: DataSet.java From flink with Apache License 2.0 | 5 votes |
/** * Returns a new set containing the first n elements in this {@link DataSet}. * * @param n The desired number of elements. * @return A ReduceGroupOperator that represents the DataSet containing the elements. */ public GroupReduceOperator<T, T> first(int n) { if (n < 1) { throw new InvalidProgramException("Parameter n of first(n) must be at least 1."); } return reduceGroup(new FirstReducer<T>(n)); }
Example #18
Source File: DataSetUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Generate a sample of DataSet which contains fixed size elements. * * <p><strong>NOTE:</strong> Sample with fixed size is not as efficient as sample with fraction, use sample with * fraction unless you need exact precision. * * @param withReplacement Whether element can be selected more than once. * @param numSamples The expected sample size. * @param seed Random number generator seed. * @return The sampled DataSet */ public static <T> DataSet<T> sampleWithSize( DataSet <T> input, final boolean withReplacement, final int numSamples, final long seed) { SampleInPartition<T> sampleInPartition = new SampleInPartition<>(withReplacement, numSamples, seed); MapPartitionOperator mapPartitionOperator = input.mapPartition(sampleInPartition); // There is no previous group, so the parallelism of GroupReduceOperator is always 1. String callLocation = Utils.getCallLocationName(); SampleInCoordinator<T> sampleInCoordinator = new SampleInCoordinator<>(withReplacement, numSamples, seed); return new GroupReduceOperator<>(mapPartitionOperator, input.getType(), sampleInCoordinator, callLocation); }
Example #19
Source File: DataSet.java From flink with Apache License 2.0 | 5 votes |
/** * Returns a new set containing the first n elements in this {@link DataSet}. * * @param n The desired number of elements. * @return A ReduceGroupOperator that represents the DataSet containing the elements. */ public GroupReduceOperator<T, T> first(int n) { if (n < 1) { throw new InvalidProgramException("Parameter n of first(n) must be at least 1."); } return reduceGroup(new FirstReducer<T>(n)); }
Example #20
Source File: DataSet.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Returns a new set containing the first n elements in this {@link DataSet}. * * @param n The desired number of elements. * @return A ReduceGroupOperator that represents the DataSet containing the elements. */ public GroupReduceOperator<T, T> first(int n) { if (n < 1) { throw new InvalidProgramException("Parameter n of first(n) must be at least 1."); } return reduceGroup(new FirstReducer<T>(n)); }
Example #21
Source File: GroupReduceOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction1()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
Example #22
Source File: GroupReduceOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction1()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) == 4); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(4)); assertTrue(semProps.getReadFields(0).contains(7)); assertTrue(semProps.getReadFields(0).contains(8)); }
Example #23
Source File: GroupReduceOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction2()) .withForwardedFields("0->4;1;1->3;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
Example #24
Source File: GroupReduceOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction2()) .withForwardedFields("0->4;1;1->3;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) == 4); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(4)); assertTrue(semProps.getReadFields(0).contains(7)); assertTrue(semProps.getReadFields(0).contains(8)); }
Example #25
Source File: GroupReduceOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction3()) .withForwardedFields("4->0;3;3->1;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0)); assertTrue(semProps.getForwardingSourceField(0, 0) == 6); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #26
Source File: GroupReduceOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector6() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction3()) .withForwardedFields("4->0;3;3->1;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 7).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 7).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 8).contains(0)); assertTrue(semProps.getForwardingSourceField(0, 0) == 8); assertTrue(semProps.getForwardingSourceField(0, 1) == 7); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 7); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #27
Source File: GroupReduceOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector7() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction4()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(0)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) == 2); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) < 0); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #28
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector7() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .reduceGroup(new DummyGroupReduceFunction4()); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(0)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) == 2); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) < 0); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
Example #29
Source File: DataSetUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Generate a sample of DataSet which contains fixed size elements. * * <p><strong>NOTE:</strong> Sample with fixed size is not as efficient as sample with fraction, use sample with * fraction unless you need exact precision. * * @param withReplacement Whether element can be selected more than once. * @param numSamples The expected sample size. * @param seed Random number generator seed. * @return The sampled DataSet */ public static <T> DataSet<T> sampleWithSize( DataSet <T> input, final boolean withReplacement, final int numSamples, final long seed) { SampleInPartition<T> sampleInPartition = new SampleInPartition<>(withReplacement, numSamples, seed); MapPartitionOperator mapPartitionOperator = input.mapPartition(sampleInPartition); // There is no previous group, so the parallelism of GroupReduceOperator is always 1. String callLocation = Utils.getCallLocationName(); SampleInCoordinator<T> sampleInCoordinator = new SampleInCoordinator<>(withReplacement, numSamples, seed); return new GroupReduceOperator<>(mapPartitionOperator, input.getType(), sampleInCoordinator, callLocation); }
Example #30
Source File: GroupReduceOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSemanticPropsWithKeySelector6() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupReduceOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> reduceOp = tupleDs.groupBy(new DummyTestKeySelector()) .sortGroup(new DummyTestKeySelector(), Order.ASCENDING) .reduceGroup(new DummyGroupReduceFunction3()) .withForwardedFields("4->0;3;3->1;2"); SemanticProperties semProps = reduceOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 7).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 7).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 7).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 8).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 8).contains(0)); assertTrue(semProps.getForwardingSourceField(0, 0) == 8); assertTrue(semProps.getForwardingSourceField(0, 1) == 7); assertTrue(semProps.getForwardingSourceField(0, 2) == 6); assertTrue(semProps.getForwardingSourceField(0, 3) == 7); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }