org.apache.flink.api.common.functions.RichReduceFunction Java Examples
The following examples show how to use
org.apache.flink.api.common.functions.RichReduceFunction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WindowTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
Example #2
Source File: AllWindowTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { private static final long serialVersionUID = -6448847205314995812L; @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
Example #3
Source File: AllWindowTranslationTest.java From flink with Apache License 2.0 | 6 votes |
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { private static final long serialVersionUID = -6448847205314995812L; @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
Example #4
Source File: WindowTranslationTest.java From flink with Apache License 2.0 | 6 votes |
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
Example #5
Source File: AllWindowTranslationTest.java From flink with Apache License 2.0 | 6 votes |
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { private static final long serialVersionUID = -6448847205314995812L; @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
Example #6
Source File: WindowTranslationTest.java From flink with Apache License 2.0 | 6 votes |
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
Example #7
Source File: ReduceTranslationTests.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test
public void translateNonGroupedReduce() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
return value1;
}
}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// check keys
assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Example #8
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 5 votes |
@Test
public void translateNonGroupedReduce() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
return value1;
}
}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// check keys
assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Example #9
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 5 votes |
@Test
public void translateNonGroupedReduce() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
return value1;
}
}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// check keys
assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Example #10
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(new KeySelector<Tuple2<String,Double>, String>() {
public String getKey(Tuple2<String, Double> value) { return value.f0; }
})
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).setCombineHint(CombineHint.HASH).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #11
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testAllReduceNoCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
data.reduce(new RichReduceFunction<Double>() {
@Override
public Double reduce(Double value1, Double value2){
return value1 + value2;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Double>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// the all-reduce has no combiner, when the parallelism of the input is one
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check parallelism
assertEquals(1, sourceNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #12
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
/**
* Test program compilation when the Reduce's combiner has been excluded
* by setting {@code CombineHint.NONE}.
*/
@Test
public void testGroupedReduceWithoutCombiner() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(0)
.reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).setCombineHint(CombineHint.NONE).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
Example #13
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 4 votes |
@Test
public void translateGroupedReduceNoMapper() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData
.groupBy(2)
.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
return value1;
}
})
.output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == parallelism || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);
// check keys
assertArrayEquals(new int[] {2}, reducer.getKeyColumns(0));
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Example #14
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 4 votes |
@Test
public void translateGroupedReduceWithkeyExtractor() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData
.groupBy(new KeySelector<Tuple3<Double, StringValue, LongValue>, StringValue>() {
public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
return value.f1;
}
})
.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
return value1;
}
}).setParallelism(4)
.output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
MapOperatorBase<?, ?, ?> keyProjector = (MapOperatorBase<?, ?, ?>) sink.getInput();
PlanUnwrappingReduceOperator<?, ?> reducer = (PlanUnwrappingReduceOperator<?, ?>) keyProjector.getInput();
MapOperatorBase<?, ?, ?> keyExtractor = (MapOperatorBase<?, ?, ?>) reducer.getInput();
// check the parallelisms
assertEquals(1, keyExtractor.getParallelism());
assertEquals(4, reducer.getParallelism());
assertEquals(4, keyProjector.getParallelism());
// check types
TypeInformation<?> keyValueInfo = new TupleTypeInfo<Tuple2<StringValue, Tuple3<Double, StringValue, LongValue>>>(
new ValueTypeInfo<StringValue>(StringValue.class),
initialData.getType());
assertEquals(initialData.getType(), keyExtractor.getOperatorInfo().getInputType());
assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, reducer.getOperatorInfo().getInputType());
assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, keyProjector.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), keyProjector.getOperatorInfo().getOutputType());
// check keys
assertEquals(KeyExtractingMapper.class, keyExtractor.getUserCodeWrapper().getUserCodeClass());
assertTrue(keyExtractor.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Example #15
Source File: ReduceOperatorTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testReduceCollectionWithRuntimeContext() {
try {
final String taskName = "Test Task";
final AtomicBoolean opened = new AtomicBoolean();
final AtomicBoolean closed = new AtomicBoolean();
final ReduceFunction<Tuple2<String, Integer>> reducer = new RichReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(
Tuple2<String, Integer> value1,
Tuple2<String, Integer> value2) throws Exception {
return new Tuple2<>(value1.f0, value1.f1 + value2.f1);
}
@Override
public void open(Configuration parameters) throws Exception {
opened.set(true);
RuntimeContext ctx = getRuntimeContext();
assertEquals(0, ctx.getIndexOfThisSubtask());
assertEquals(1, ctx.getNumberOfParallelSubtasks());
assertEquals(taskName, ctx.getTaskName());
}
@Override
public void close() throws Exception {
closed.set(true);
}
};
ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op =
new ReduceOperatorBase<>(
reducer,
new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE),
new int[]{0},
"TestReducer");
List<Tuple2<String, Integer>> input = new ArrayList<>(asList(
new Tuple2<>("foo", 1),
new Tuple2<>("foo", 3),
new Tuple2<>("bar", 2),
new Tuple2<>("bar", 4)));
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input,
new RuntimeUDFContext(taskInfo, null, executionConfig,
new HashMap<>(),
new HashMap<>(),
new UnregisteredMetricsGroup()),
executionConfig);
executionConfig.enableObjectReuse();
List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input,
new RuntimeUDFContext(taskInfo, null, executionConfig,
new HashMap<>(),
new HashMap<>(),
new UnregisteredMetricsGroup()),
executionConfig);
Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<>(resultMutableSafe);
Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<>(resultRegular);
Set<Tuple2<String, Integer>> expectedResult = new HashSet<>(asList(
new Tuple2<>("foo", 4),
new Tuple2<>("bar", 6)));
assertEquals(expectedResult, resultSetMutableSafe);
assertEquals(expectedResult, resultSetRegular);
assertTrue(opened.get());
assertTrue(closed.get());
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Example #16
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testAllReduceNoCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
data.reduce(new RichReduceFunction<Double>() {
@Override
public Double reduce(Double value1, Double value2){
return value1 + value2;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Double>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// the all-reduce has no combiner, when the parallelism of the input is one
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check parallelism
assertEquals(1, sourceNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #17
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testAllReduceWithCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
data.reduce(new RichReduceFunction<Long>() {
@Override
public Long reduce(Long value1, Long value2){
return value1 + value2;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Long>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.ALL_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.ALL_REDUCE, combineNode.getDriverStrategy());
// check parallelism
assertEquals(8, sourceNode.getParallelism());
assertEquals(8, combineNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #18
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithFieldPositionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(1)
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), combineNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #19
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithSelectorFunctionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(new KeySelector<Tuple2<String,Double>, String>() {
public String getKey(Tuple2<String, Double> value) { return value.f0; }
})
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #20
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(new KeySelector<Tuple2<String,Double>, String>() {
public String getKey(Tuple2<String, Double> value) { return value.f0; }
})
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).setCombineHint(CombineHint.HASH).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #21
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
/**
* Test program compilation when the Reduce's combiner has been excluded
* by setting {@code CombineHint.NONE}.
*/
@Test
public void testGroupedReduceWithoutCombiner() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(0)
.reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).setCombineHint(CombineHint.NONE).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
Example #22
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithSelectorFunctionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(new KeySelector<Tuple2<String,Double>, String>() {
public String getKey(Tuple2<String, Double> value) { return value.f0; }
})
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #23
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithFieldPositionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(1)
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), combineNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #24
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testAllReduceWithCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
data.reduce(new RichReduceFunction<Long>() {
@Override
public Long reduce(Long value1, Long value2){
return value1 + value2;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Long>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.ALL_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.ALL_REDUCE, combineNode.getDriverStrategy());
// check parallelism
assertEquals(8, sourceNode.getParallelism());
assertEquals(8, combineNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #25
Source File: ReduceOperatorTest.java From flink with Apache License 2.0 | 4 votes |
@Test
public void testReduceCollectionWithRuntimeContext() {
try {
final String taskName = "Test Task";
final AtomicBoolean opened = new AtomicBoolean();
final AtomicBoolean closed = new AtomicBoolean();
final ReduceFunction<Tuple2<String, Integer>> reducer = new RichReduceFunction<Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> reduce(
Tuple2<String, Integer> value1,
Tuple2<String, Integer> value2) throws Exception {
return new Tuple2<>(value1.f0, value1.f1 + value2.f1);
}
@Override
public void open(Configuration parameters) throws Exception {
opened.set(true);
RuntimeContext ctx = getRuntimeContext();
assertEquals(0, ctx.getIndexOfThisSubtask());
assertEquals(1, ctx.getNumberOfParallelSubtasks());
assertEquals(taskName, ctx.getTaskName());
}
@Override
public void close() throws Exception {
closed.set(true);
}
};
ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op =
new ReduceOperatorBase<>(
reducer,
new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE),
new int[]{0},
"TestReducer");
List<Tuple2<String, Integer>> input = new ArrayList<>(asList(
new Tuple2<>("foo", 1),
new Tuple2<>("foo", 3),
new Tuple2<>("bar", 2),
new Tuple2<>("bar", 4)));
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input,
new RuntimeUDFContext(taskInfo, null, executionConfig,
new HashMap<>(),
new HashMap<>(),
new UnregisteredMetricsGroup()),
executionConfig);
executionConfig.enableObjectReuse();
List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input,
new RuntimeUDFContext(taskInfo, null, executionConfig,
new HashMap<>(),
new HashMap<>(),
new UnregisteredMetricsGroup()),
executionConfig);
Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<>(resultMutableSafe);
Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<>(resultRegular);
Set<Tuple2<String, Integer>> expectedResult = new HashSet<>(asList(
new Tuple2<>("foo", 4),
new Tuple2<>("bar", 6)));
assertEquals(expectedResult, resultSetMutableSafe);
assertEquals(expectedResult, resultSetRegular);
assertTrue(opened.get());
assertTrue(closed.get());
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Example #26
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 4 votes |
@Test
public void translateGroupedReduceWithkeyExtractor() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData
.groupBy(new KeySelector<Tuple3<Double, StringValue, LongValue>, StringValue>() {
public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
return value.f1;
}
})
.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
return value1;
}
}).setParallelism(4)
.output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
MapOperatorBase<?, ?, ?> keyProjector = (MapOperatorBase<?, ?, ?>) sink.getInput();
PlanUnwrappingReduceOperator<?, ?> reducer = (PlanUnwrappingReduceOperator<?, ?>) keyProjector.getInput();
MapOperatorBase<?, ?, ?> keyExtractor = (MapOperatorBase<?, ?, ?>) reducer.getInput();
// check the parallelisms
assertEquals(1, keyExtractor.getParallelism());
assertEquals(4, reducer.getParallelism());
assertEquals(4, keyProjector.getParallelism());
// check types
TypeInformation<?> keyValueInfo = new TupleTypeInfo<Tuple2<StringValue, Tuple3<Double, StringValue, LongValue>>>(
new ValueTypeInfo<StringValue>(StringValue.class),
initialData.getType());
assertEquals(initialData.getType(), keyExtractor.getOperatorInfo().getInputType());
assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, reducer.getOperatorInfo().getInputType());
assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, keyProjector.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), keyProjector.getOperatorInfo().getOutputType());
// check keys
assertEquals(KeyExtractingMapper.class, keyExtractor.getUserCodeWrapper().getUserCodeClass());
assertTrue(keyExtractor.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Example #27
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 4 votes |
@Test
public void translateGroupedReduceNoMapper() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData
.groupBy(2)
.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
return value1;
}
})
.output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == parallelism || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);
// check keys
assertArrayEquals(new int[] {2}, reducer.getKeyColumns(0));
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Example #28
Source File: ReduceCompilationTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/**
* Test program compilation when the Reduce's combiner has been excluded
* by setting {@code CombineHint.NONE}.
*/
@Test
public void testGroupedReduceWithoutCombiner() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(0)
.reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).setCombineHint(CombineHint.NONE).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
Example #29
Source File: ReduceCompilationTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(new KeySelector<Tuple2<String,Double>, String>() {
public String getKey(Tuple2<String, Double> value) { return value.f0; }
})
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).setCombineHint(CombineHint.HASH).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Example #30
Source File: ReduceCompilationTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test
public void testGroupedReduceWithSelectorFunctionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
.name("source").setParallelism(6);
data
.groupBy(new KeySelector<Tuple2<String,Double>, String>() {
public String getKey(Tuple2<String, Double> value) { return value.f0; }
})
.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
return null;
}
}).name("reducer")
.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
}
catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}