org.apache.flink.api.java.DataSet#reduceGroup

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}

Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}

Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Source File: QuantileDiscretizerTrainBatchOp.java From Alink with Apache License 2.0

6 votes

public static DataSet<FeatureBorder> transformModelToFeatureBorder(DataSet<Row> modelDataSet) {
	return modelDataSet
		.reduceGroup(
			new GroupReduceFunction<Row, FeatureBorder>() {
				@Override
				public void reduce(Iterable<Row> values, Collector<FeatureBorder> out) throws Exception {
					List<Row> list = new ArrayList<>();
					values.forEach(list::add);
					QuantileDiscretizerModelDataConverter model
						= new QuantileDiscretizerModelDataConverter().load(list);
					for (Map.Entry<String, FeatureBorder> entry : model.data.entrySet()) {
						out.collect(entry.getValue());
					}
				}
			}
		);
}

Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}

Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Source File: ReduceWithCombinerITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}

Source File: HadoopReduceCombineFunctionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Source File: HadoopReduceFunctionITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Source File: QuantileDiscretizerTrainBatchOp.java From Alink with Apache License 2.0

4 votes

@Override
public QuantileDiscretizerTrainBatchOp linkFrom(BatchOperator<?>... inputs) {
	BatchOperator<?> in = checkAndGetFirst(inputs);
	if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS) && getParams().contains(
		QuantileDiscretizerTrainParams.NUM_BUCKETS_ARRAY)) {
		throw new RuntimeException("It can not set num_buckets and num_buckets_array at the same time.");
	}

	String[] quantileColNames =
		getSelectedCols();

	int[] quantileNum = null;

	if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS)) {
		quantileNum = new int[quantileColNames.length];
		Arrays.fill(quantileNum, getNumBuckets());
	} else {
		quantileNum = Arrays.stream(getNumBucketsArray()).mapToInt(Integer::intValue).toArray();
	}

	/* filter the selected column from input */
	DataSet<Row> input = Preprocessing.select(in, quantileColNames).getDataSet();

	DataSet<Row> quantile = quantile(
		input, quantileNum,
		getParams().get(HasRoundMode.ROUND_MODE),
		getParams().get(Preprocessing.ZERO_AS_MISSING)
	);

	quantile = quantile.reduceGroup(
		new SerializeModel(
			getParams(),
			quantileColNames,
			TableUtil.findColTypesWithAssertAndHint(in.getSchema(), quantileColNames),
			BinTypes.BinDivideType.QUANTILE
		)
	);

	/* set output */
	setOutput(quantile, new QuantileDiscretizerModelDataConverter().getModelSchema());

	return this;
}

Source File: HadoopReduceFunctionITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Source File: HadoopReduceFunctionITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Java Code Examples for org.apache.flink.api.java.DataSet#reduceGroup()