Java Code Examples for org.apache.flink.api.java.DataSet#groupBy()
The following examples show how to use
org.apache.flink.api.java.DataSet#groupBy() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GroupingTest.java From flink with Apache License 2.0 | 7 votes |
@Test @SuppressWarnings("serial") public void testGroupByKeySelector3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work customDs.groupBy( new KeySelector<GroupingTest.CustomType, CustomType>() { @Override public CustomType getKey(CustomType value) { return value; } }); } catch (Exception e) { Assert.fail(); } }
Example 2
Source File: ReduceWithCombinerITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testForkingReduceOnKeyedDataset() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // creates the input data and distributes them evenly among the available downstream tasks DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env); UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(0); DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer()); DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer()); List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect(); String expected = "k1,6,true\n" + "k2,4,true\n" + "k1,6,true\n" + "k2,4,true\n"; compareResultAsTuples(actual, expected); }
Example 3
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test(expected = InvalidProgramException.class) @SuppressWarnings("serial") public void testGroupByKeySelector5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work customDs.groupBy( new KeySelector<GroupingTest.CustomType, CustomType2>() { @Override public CustomType2 getKey(CustomType value) { return new CustomType2(); } }); }
Example 4
Source File: GroupingTest.java From flink with Apache License 2.0 | 6 votes |
@Test(expected = InvalidProgramException.class) @SuppressWarnings("serial") public void testGroupByKeySelector5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work customDs.groupBy( new KeySelector<GroupingTest.CustomType, CustomType2>() { @Override public CustomType2 getKey(CustomType value) { return new CustomType2(); } }); }
Example 5
Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testForkingReduceOnKeyedDatasetWithSelection() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // creates the input data and distributes them evenly among the available downstream tasks DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env); UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(new KeySelectorX()); DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer()); DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer()); List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect(); String expected = "k1,6,true\n" + "k2,4,true\n" + "k1,6,true\n" + "k2,4,true\n"; compareResultAsTuples(actual, expected); }
Example 6
Source File: GroupingTest.java From flink with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("serial") public void testGroupByKeySelector2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should work customDs.groupBy( new KeySelector<GroupingTest.CustomType, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> getKey(CustomType value) { return new Tuple2<Integer, Long>(value.myInt, value.myLong); } }); } catch (Exception e) { Assert.fail(); } }
Example 7
Source File: GroupingTest.java From flink with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("serial") public void testGroupByKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should work customDs.groupBy( new KeySelector<GroupingTest.CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } }); } catch (Exception e) { Assert.fail(); } }
Example 8
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("serial") public void testGroupByKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); try { DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should work customDs.groupBy( new KeySelector<GroupingTest.CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } }); } catch (Exception e) { Assert.fail(); } }
Example 9
Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testForkingReduceOnKeyedDataset() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // creates the input data and distributes them evenly among the available downstream tasks DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env); UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(0); DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer()); DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer()); List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect(); String expected = "k1,6,true\n" + "k2,4,true\n" + "k1,6,true\n" + "k2,4,true\n"; compareResultAsTuples(actual, expected); }
Example 10
Source File: GroupingTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3); dataSet.groupBy("*", "invalidField"); }
Example 11
Source File: GroupingTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3); dataSet.groupBy("invalidField"); }
Example 12
Source File: GroupingTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = IllegalArgumentException.class) public void testGroupByKeyExpressions4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = env.fromCollection(customTypeData); // should not work, key out of tuple bounds ds.groupBy("myNonExistent"); }
Example 13
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testGroupByKeyExpressions2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO); // should not work: groups on basic type longDs.groupBy("myInt"); }
Example 14
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3); dataSet.groupBy("*", "invalidField"); }
Example 15
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = IndexOutOfBoundsException.class) public void testGroupByKeyFields5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, negative field position tupleDs.groupBy(-1); }
Example 16
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = IndexOutOfBoundsException.class) public void testGroupByKeyFields4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, key out of tuple bounds tupleDs.groupBy(5); }
Example 17
Source File: GroupingTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3); dataSet.groupBy("invalidField"); }
Example 18
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testGroupAtomicTypeWithInvalid3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<ArrayList<Integer>> dataSet = env.fromElements(new ArrayList<Integer>()); dataSet.groupBy("*"); }
Example 19
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 4 votes |
private DataSet<Tuple> translateGroupBy(DataSet<Tuple> input, FlowNode node, int dop) { GroupBy groupBy = (GroupBy) node.getSourceElements().iterator().next(); Scope outScope = getOutScope(node); List<Scope> inScopes = getInputScopes(node, groupBy); Fields outFields; if(outScope.isEvery()) { outFields = outScope.getOutGroupingFields(); } else { outFields = outScope.getOutValuesFields(); } registerKryoTypes(outFields); // get input scope Scope inScope = inScopes.get(0); // get grouping keys Fields groupKeyFields = groupBy.getKeySelectors().get(inScope.getName()); // get group sorting keys Fields sortKeyFields = groupBy.getSortingSelectors().get(inScope.getName()); String[] groupKeys = registerKeyFields(input, groupKeyFields); String[] sortKeys = null; if (sortKeyFields != null) { sortKeys = registerKeyFields(input, sortKeyFields); } Order sortOrder = groupBy.isSortReversed() ? Order.DESCENDING : Order.ASCENDING; if(sortOrder == Order.DESCENDING) { // translate groupBy with inverse sort order return translateInverseSortedGroupBy(input, node, dop, groupKeys, sortKeys, outFields); } else if(groupKeys == null || groupKeys.length == 0) { // translate key-less (global) groupBy return translateGlobalGroupBy(input, node, dop, sortKeys, sortOrder, outFields); } else { UnsortedGrouping<Tuple> grouping = input .groupBy(groupKeys); if(sortKeys != null && sortKeys.length > 0) { // translate groupBy with group sorting SortedGrouping<Tuple> sortedGrouping = grouping .sortGroup(sortKeys[0], Order.ASCENDING); for(int i=1; i<sortKeys.length; i++) { sortedGrouping = sortedGrouping .sortGroup(sortKeys[i], Order.DESCENDING); } return sortedGrouping .reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-" + node.getID()); } else { // translate groupBy without group sorting return grouping .reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-" + node.getID()); } } }
Example 20
Source File: GroupingTest.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
@Test(expected = InvalidProgramException.class) public void testGroupByKeyExpressions3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); this.customTypeData.add(new CustomType()); DataSet<CustomType> customDs = env.fromCollection(customTypeData); // should not work: tuple selector on custom type customDs.groupBy(0); }