cascading.pipe.GroupBy Java Examples
The following examples show how to use
cascading.pipe.GroupBy.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BoundaryBeforeGroupByRemovalTransformer.java From cascading-flink with Apache License 2.0 | 5 votes |
public BoundaryGroupByMatcher() { super( (new ExpressionGraph()). arc( new TypeExpression(ElementCapture.Primary, Boundary.class, TypeExpression.Topo.LinearOut), ScopeExpression.ALL, new TypeExpression(ElementCapture.Secondary, GroupBy.class, TypeExpression.Topo.LinearIn) ) ); }
Example #2
Source File: GroupByReducer.java From cascading-flink with Apache License 2.0 | 5 votes |
@Override public void open(Configuration config) { this.calledPrepare = false; try { currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); if(sources.size() != 1) { throw new RuntimeException("FlowNode for GroupByReducer may only have a single source"); } FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof GroupBy)) { throw new RuntimeException("Source of GroupByReducer must be a GroupBy"); } GroupBy source = (GroupBy)sourceElement; streamGraph = new GroupByStreamGraph( currentProcess, flowNode, source ); groupSource = this.streamGraph.getGroupSource(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if( throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException( "internal error during GroupByReducer configuration", throwable ); } }
Example #3
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 4 votes |
private DataSet<Tuple> translateGroupBy(DataSet<Tuple> input, FlowNode node, int dop) { GroupBy groupBy = (GroupBy) node.getSourceElements().iterator().next(); Scope outScope = getOutScope(node); List<Scope> inScopes = getInputScopes(node, groupBy); Fields outFields; if(outScope.isEvery()) { outFields = outScope.getOutGroupingFields(); } else { outFields = outScope.getOutValuesFields(); } registerKryoTypes(outFields); // get input scope Scope inScope = inScopes.get(0); // get grouping keys Fields groupKeyFields = groupBy.getKeySelectors().get(inScope.getName()); // get group sorting keys Fields sortKeyFields = groupBy.getSortingSelectors().get(inScope.getName()); String[] groupKeys = registerKeyFields(input, groupKeyFields); String[] sortKeys = null; if (sortKeyFields != null) { sortKeys = registerKeyFields(input, sortKeyFields); } Order sortOrder = groupBy.isSortReversed() ? Order.DESCENDING : Order.ASCENDING; if(sortOrder == Order.DESCENDING) { // translate groupBy with inverse sort order return translateInverseSortedGroupBy(input, node, dop, groupKeys, sortKeys, outFields); } else if(groupKeys == null || groupKeys.length == 0) { // translate key-less (global) groupBy return translateGlobalGroupBy(input, node, dop, sortKeys, sortOrder, outFields); } else { UnsortedGrouping<Tuple> grouping = input .groupBy(groupKeys); if(sortKeys != null && sortKeys.length > 0) { // translate groupBy with group sorting SortedGrouping<Tuple> sortedGrouping = grouping .sortGroup(sortKeys[0], Order.ASCENDING); for(int i=1; i<sortKeys.length; i++) { sortedGrouping = sortedGrouping .sortGroup(sortKeys[i], Order.DESCENDING); } return sortedGrouping .reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-" + node.getID()); } else { // translate groupBy without group sorting return grouping .reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-" + node.getID()); } } }
Example #4
Source File: HashJoinMapperStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) { throw new UnsupportedOperationException("Cannot create a GroupBy gate in a HashJoinMapperStreamGraph"); }
Example #5
Source File: HashJoinStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) { throw new UnsupportedOperationException("Cannot create a GroupBy gate in a HashJoinStreamGraph"); }
Example #6
Source File: GroupByStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
private void buildGraph( GroupBy groupBy, FlowProcess flowProcess ) { this.sourceStage = new GroupByInGate(flowProcess, groupBy, IORole.source); addHead( sourceStage ); handleDuct( groupBy, sourceStage ); }
Example #7
Source File: GroupByStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) { throw new UnsupportedOperationException("Cannot create a GroupBy gate in a GroupByStreamGraph"); }
Example #8
Source File: GroupByInGate.java From cascading-flink with Apache License 2.0 | 4 votes |
public GroupByInGate(FlowProcess flowProcess, GroupBy splice, IORole ioRole) { super(flowProcess, splice, ioRole); this.isBufferJoin = splice.getJoiner() instanceof BufferJoin; }
Example #9
Source File: SinkStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) { throw new UnsupportedOperationException("Cannot create a GroupBy gate in a SinkStreamGraph"); }
Example #10
Source File: EachStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) { throw new UnsupportedOperationException("Cannot create a GroupBy gate in a MapStreamGraph."); }
Example #11
Source File: SourceStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy element, IORole role) { throw new UnsupportedOperationException("SourceStreamGraph may not have a GroupByGate"); }
Example #12
Source File: CoGroupBufferReduceStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) { throw new UnsupportedOperationException("Cannot create a GroupBy gate in a CoGroupBufferReduceStreamGraph"); }
Example #13
Source File: CoGroupReduceStreamGraph.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) { throw new UnsupportedOperationException("Cannot create a GroupBy gate in a CoGroupReduceStreamGraph"); }
Example #14
Source File: GroupByStreamGraph.java From cascading-flink with Apache License 2.0 | 3 votes |
public GroupByStreamGraph(FlinkFlowProcess flowProcess, FlowNode node, GroupBy groupBy) { super(flowProcess, node); buildGraph(groupBy, flowProcess); setTraps(); setScopes(); printGraph( node.getID(), "groupby", flowProcess.getCurrentSliceNum() ); bind(); }
Example #15
Source File: BoundaryBeforeGroupByTransformer.java From cascading-flink with Apache License 2.0 | 2 votes |
public GroupByGraph() { super(SearchOrder.ReverseTopological, new FlowElementExpression(ElementCapture.Primary, GroupBy.class)); }