Java Code Examples for org.apache.flink.api.java.DataSet#reduceGroup()
The following examples show how to use
org.apache.flink.api.java.DataSet#reduceGroup() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GroupReduceITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception { /* * check correctness of all-groupreduce for custom types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce()); List<CustomType> result = reduceDs.collect(); String expected = "91,210,Hello!"; compareResultAsText(result, expected); }
Example 2
Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testForkingReduceOnNonKeyedDataset() throws Exception { // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // creates the input data and distributes them evenly among the available downstream tasks DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env); DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer()); DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer()); List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect(); String expected = "10,true\n10,true\n"; compareResultAsTuples(actual, expected); }
Example 3
Source File: GroupReduceITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception { /* * check correctness of all-groupreduce for custom types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce()); List<CustomType> result = reduceDs.collect(); String expected = "91,210,Hello!"; compareResultAsText(result, expected); }
Example 4
Source File: GroupReduceITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfAllGroupReduceForTuples() throws Exception { /* * check correctness of all-groupreduce for tuples */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "231,91,Hello World\n"; compareResultAsTuples(result, expected); }
Example 5
Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper2()); DataSet<Tuple2<IntWritable, IntWritable>> sum = ds. reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>( new SumReducer(), new SumReducer())); String resultPath = tempFolder.newFile().toURI().toString(); sum.writeAsText(resultPath); env.execute(); String expected = "(0,231)\n"; compareResultsByLinesInMemory(expected, resultPath); }
Example 6
Source File: QuantileDiscretizerTrainBatchOp.java From Alink with Apache License 2.0 | 6 votes |
public static DataSet<FeatureBorder> transformModelToFeatureBorder(DataSet<Row> modelDataSet) { return modelDataSet .reduceGroup( new GroupReduceFunction<Row, FeatureBorder>() { @Override public void reduce(Iterable<Row> values, Collector<FeatureBorder> out) throws Exception { List<Row> list = new ArrayList<>(); values.forEach(list::add); QuantileDiscretizerModelDataConverter model = new QuantileDiscretizerModelDataConverter().load(list); for (Map.Entry<String, FeatureBorder> entry : model.data.entrySet()) { out.collect(entry.getValue()); } } } ); }
Example 7
Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testForkingReduceOnNonKeyedDataset() throws Exception { // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // creates the input data and distributes them evenly among the available downstream tasks DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env); DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer()); DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer()); List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect(); String expected = "10,true\n10,true\n"; compareResultAsTuples(actual, expected); }
Example 8
Source File: GroupReduceITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfAllGroupReduceForTuples() throws Exception { /* * check correctness of all-groupreduce for tuples */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "231,91,Hello World\n"; compareResultAsTuples(result, expected); }
Example 9
Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper2()); DataSet<Tuple2<IntWritable, IntWritable>> sum = ds. reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>( new SumReducer(), new SumReducer())); String resultPath = tempFolder.newFile().toURI().toString(); sum.writeAsText(resultPath); env.execute(); String expected = "(0,231)\n"; compareResultsByLinesInMemory(expected, resultPath); }
Example 10
Source File: ReduceWithCombinerITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testForkingReduceOnNonKeyedDataset() throws Exception { // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // creates the input data and distributes them evenly among the available downstream tasks DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env); DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer()); DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer()); List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect(); String expected = "10,true\n10,true\n"; compareResultAsTuples(actual, expected); }
Example 11
Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception { /* * check correctness of all-groupreduce for custom types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce()); List<CustomType> result = reduceDs.collect(); String expected = "91,210,Hello!"; compareResultAsText(result, expected); }
Example 12
Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfAllGroupReduceForTuples() throws Exception { /* * check correctness of all-groupreduce for tuples */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "231,91,Hello World\n"; compareResultAsTuples(result, expected); }
Example 13
Source File: HadoopReduceCombineFunctionITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper2()); DataSet<Tuple2<IntWritable, IntWritable>> sum = ds. reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>( new SumReducer(), new SumReducer())); String resultPath = tempFolder.newFile().toURI().toString(); sum.writeAsText(resultPath); env.execute(); String expected = "(0,231)\n"; compareResultsByLinesInMemory(expected, resultPath); }
Example 14
Source File: HadoopReduceFunctionITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env); DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds. reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer())); String resultPath = tempFolder.newFile().toURI().toString(); commentCnts.writeAsText(resultPath); env.execute(); String expected = "(42,15)\n"; compareResultsByLinesInMemory(expected, resultPath); }
Example 15
Source File: QuantileDiscretizerTrainBatchOp.java From Alink with Apache License 2.0 | 4 votes |
@Override public QuantileDiscretizerTrainBatchOp linkFrom(BatchOperator<?>... inputs) { BatchOperator<?> in = checkAndGetFirst(inputs); if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS) && getParams().contains( QuantileDiscretizerTrainParams.NUM_BUCKETS_ARRAY)) { throw new RuntimeException("It can not set num_buckets and num_buckets_array at the same time."); } String[] quantileColNames = getSelectedCols(); int[] quantileNum = null; if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS)) { quantileNum = new int[quantileColNames.length]; Arrays.fill(quantileNum, getNumBuckets()); } else { quantileNum = Arrays.stream(getNumBucketsArray()).mapToInt(Integer::intValue).toArray(); } /* filter the selected column from input */ DataSet<Row> input = Preprocessing.select(in, quantileColNames).getDataSet(); DataSet<Row> quantile = quantile( input, quantileNum, getParams().get(HasRoundMode.ROUND_MODE), getParams().get(Preprocessing.ZERO_AS_MISSING) ); quantile = quantile.reduceGroup( new SerializeModel( getParams(), quantileColNames, TableUtil.findColTypesWithAssertAndHint(in.getSchema(), quantileColNames), BinTypes.BinDivideType.QUANTILE ) ); /* set output */ setOutput(quantile, new QuantileDiscretizerModelDataConverter().getModelSchema()); return this; }
Example 16
Source File: HadoopReduceFunctionITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env); DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds. reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer())); String resultPath = tempFolder.newFile().toURI().toString(); commentCnts.writeAsText(resultPath); env.execute(); String expected = "(42,15)\n"; compareResultsByLinesInMemory(expected, resultPath); }
Example 17
Source File: HadoopReduceFunctionITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env); DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds. reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer())); String resultPath = tempFolder.newFile().toURI().toString(); commentCnts.writeAsText(resultPath); env.execute(); String expected = "(42,15)\n"; compareResultsByLinesInMemory(expected, resultPath); }