org.apache.flink.runtime.operators.shipping.ShipStrategyType Java Examples
The following examples show how to use
org.apache.flink.runtime.operators.shipping.ShipStrategyType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonMapper.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static String getShipStrategyString(ShipStrategyType shipType) { if (shipType == null) { return "(null)"; } switch (shipType) { case NONE: return "(none)"; case FORWARD: return "Forward"; case BROADCAST: return "Broadcast"; case PARTITION_HASH: return "Hash Partition"; case PARTITION_RANGE: return "Range Partition"; case PARTITION_RANDOM: return "Redistribute"; case PARTITION_FORCED_REBALANCE: return "Rebalance"; case PARTITION_CUSTOM: return "Custom Partition"; default: return shipType.name(); } }
Example #2
Source File: OutputEmitterTest.java From flink with Apache License 2.0 | 6 votes |
private boolean verifyWrongPartitionHashKey(int position, int fieldNum) { final TypeComparator<Record> comparator = new RecordComparatorFactory( new int[] {position}, new Class[] {IntValue.class}).createComparator(); final ChannelSelector<SerializationDelegate<Record>> selector = createChannelSelector( ShipStrategyType.PARTITION_HASH, comparator, 100); final SerializationDelegate<Record> delegate = new SerializationDelegate<>(new RecordSerializerFactory().getSerializer()); Record record = new Record(2); record.setField(fieldNum, new IntValue(1)); delegate.setInstance(record); try { selector.selectChannel(delegate); } catch (NullKeyFieldException re) { Assert.assertEquals(position, re.getFieldNumber()); return true; } return false; }
Example #3
Source File: BinaryUnionReplacer.java From flink with Apache License 2.0 | 6 votes |
public void collect(Channel in, List<Channel> inputs) { if (in.getSource() instanceof NAryUnionPlanNode) { // sanity check if (in.getShipStrategy() != ShipStrategyType.FORWARD) { throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators."); } if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) { throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators."); } inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs()); } else { // is not a collapsed union node, so we take the channel directly inputs.add(in); } }
Example #4
Source File: OptimizerNode.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * This function connects the operators that produce the broadcast inputs to this operator. * * @param operatorToNode The map from program operators to optimizer nodes. * @param defaultExchangeMode The data exchange mode to use, if the operator does not * specify one. * * @throws CompilerException */ public void setBroadcastInputs(Map<Operator<?>, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode) { // skip for Operators that don't support broadcast variables if (!(getOperator() instanceof AbstractUdfOperator<?, ?>)) { return; } // get all broadcast inputs AbstractUdfOperator<?, ?> operator = ((AbstractUdfOperator<?, ?>) getOperator()); // create connections and add them for (Map.Entry<String, Operator<?>> input : operator.getBroadcastInputs().entrySet()) { OptimizerNode predecessor = operatorToNode.get(input.getValue()); DagConnection connection = new DagConnection(predecessor, this, ShipStrategyType.BROADCAST, defaultExchangeMode); addBroadcastConnection(input.getKey(), connection); predecessor.addOutgoingConnection(connection); } }
Example #5
Source File: DualInputPlanNode.java From flink with Apache License 2.0 | 6 votes |
public DualInputPlanNode(OptimizerNode template, String nodeName, Channel input1, Channel input2, DriverStrategy diverStrategy, FieldList driverKeyFields1, FieldList driverKeyFields2, boolean[] driverSortOrders) { super(template, nodeName, diverStrategy); this.input1 = input1; this.input2 = input2; this.keys1 = driverKeyFields1; this.keys2 = driverKeyFields2; this.sortOrders = driverSortOrders; if (this.input1.getShipStrategy() == ShipStrategyType.BROADCAST) { this.input1.setReplicationFactor(getParallelism()); } if (this.input2.getShipStrategy() == ShipStrategyType.BROADCAST) { this.input2.setReplicationFactor(getParallelism()); } mergeBranchPlanMaps(input1.getSource(), input2.getSource()); }
Example #6
Source File: DualInputPlanNode.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public DualInputPlanNode(OptimizerNode template, String nodeName, Channel input1, Channel input2, DriverStrategy diverStrategy, FieldList driverKeyFields1, FieldList driverKeyFields2, boolean[] driverSortOrders) { super(template, nodeName, diverStrategy); this.input1 = input1; this.input2 = input2; this.keys1 = driverKeyFields1; this.keys2 = driverKeyFields2; this.sortOrders = driverSortOrders; if (this.input1.getShipStrategy() == ShipStrategyType.BROADCAST) { this.input1.setReplicationFactor(getParallelism()); } if (this.input2.getShipStrategy() == ShipStrategyType.BROADCAST) { this.input2.setReplicationFactor(getParallelism()); } mergeBranchPlanMaps(input1.getSource(), input2.getSource()); }
Example #7
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance. * Expected to re-establish partitioning between map and reduce via a local hash. */ @Test public void checkPropertyHandlingWithIncreasingLocalParallelism() { final int p = DEFAULT_PARALLELISM * 2; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn)); }
Example #8
Source File: OutputEmitterTest.java From flink with Apache License 2.0 | 5 votes |
private int[] getSelectedChannelsHitCount( ShipStrategyType shipStrategyType, int numRecords, int numberOfChannels, Enum recordType) { final TypeComparator<Record> comparator = new RecordComparatorFactory( new int[] {0}, new Class[] {recordType == RecordType.INTEGER ? IntValue.class : StringValue.class}).createComparator(); final ChannelSelector<SerializationDelegate<Record>> selector = createChannelSelector(shipStrategyType, comparator, numberOfChannels); final SerializationDelegate<Record> delegate = new SerializationDelegate<>(new RecordSerializerFactory().getSerializer()); return getSelectedChannelsHitCount(selector, delegate, recordType, numRecords, numberOfChannels); }
Example #9
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctDestroysPartitioningOfNonDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(1) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer must repartition, because it works on a different field assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #10
Source File: GroupingPojoTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleGroupReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> data = env.fromElements(new Pojo2()) .rebalance().setParallelism(4); data.groupBy("a").withPartitioner(new TestPartitionerInt()) .reduceGroup(new IdentityGroupReducerCombinable<Pojo2>()) .output(new DiscardingOutputFormat<Pojo2>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #11
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map. */ @Test public void checkJoinWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #12
Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(0) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #13
Source File: OutputEmitterTest.java From flink with Apache License 2.0 | 5 votes |
private void verifyForwardSelectedChannels(int numRecords, int numberOfChannels, Enum recordType) { int[] hits = getSelectedChannelsHitCount(ShipStrategyType.FORWARD, numRecords, numberOfChannels, recordType); assertTrue(hits[0] == numRecords); for (int i = 1; i < hits.length; i++) { assertTrue(hits[i] == 0); } }
Example #14
Source File: JoinTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testPartitionHashFirstTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_HASH_FIRST); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
Example #15
Source File: GroupingPojoTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleGroupReduceSorted() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo3> data = env.fromElements(new Pojo3()) .rebalance().setParallelism(4); data.groupBy("a").withPartitioner(new TestPartitionerInt()) .sortGroup("b", Order.ASCENDING) .reduceGroup(new IdentityGroupReducerCombinable<Pojo3>()) .output(new DiscardingOutputFormat<Pojo3>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #16
Source File: JoinTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testBroadcastHashFirstTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.BROADCAST_HASH_FIRST); assertEquals(ShipStrategyType.BROADCAST, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
Example #17
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source. */ @Test public void checkJoinWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #18
Source File: AllGroupWithPartialPreGroupProperties.java From flink with Apache License 2.0 | 5 votes |
@Override public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) { if (in.getShipStrategy() == ShipStrategyType.FORWARD) { // locally connected, directly instantiate return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")", in, DriverStrategy.ALL_GROUP_REDUCE); } else { // non forward case.plug in a combiner Channel toCombiner = new Channel(in.getSource()); toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); // create an input node for combine with same parallelism as input node GroupReduceNode combinerNode = ((GroupReduceNode) node).getCombinerUtilityNode(); combinerNode.setParallelism(in.getSource().getParallelism()); SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine ("+node.getOperator().getName()+")", toCombiner, DriverStrategy.ALL_GROUP_REDUCE_COMBINE); combiner.setCosts(new Costs(0, 0)); combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties()); Channel toReducer = new Channel(combiner); toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(), in.getShipStrategySortOrder(), in.getDataExchangeMode()); toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder()); return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")", toReducer, DriverStrategy.ALL_GROUP_REDUCE); } }
Example #19
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind flatMap. */ @Test public void checkJoinWithReplicatedSourceInputBehindFlatMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .flatMap(new IdFlatMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #20
Source File: DagConnection.java From flink with Apache License 2.0 | 5 votes |
/** * Constructor to create a result from an operator that is not * consumed by another operator. * * @param source * The source node. * @param exchangeMode * The data exchange mode (pipelined / batch / batch only for shuffles / ... ) */ public DagConnection(OptimizerNode source, ExecutionMode exchangeMode) { if (source == null) { throw new NullPointerException("Source must not be null."); } this.source = source; this.target = null; this.shipStrategy = ShipStrategyType.NONE; this.dataExchangeMode = exchangeMode; }
Example #21
Source File: DistinctAndGroupingOptimizerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4); data.distinct(0) .groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #22
Source File: GroupingPojoTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> data = env.fromElements(new Pojo2()) .rebalance().setParallelism(4); data.groupBy("a").withPartitioner(new TestPartitionerInt()) .reduce(new SelectOneReducer<Pojo2>()) .output(new DiscardingOutputFormat<Pojo2>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #23
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable. * Expected to re-establish partitioning between map and reduce (hash). */ @Test public void checkPropertyHandlingWithIncreasingGlobalParallelism2() { final int p = DEFAULT_PARALLELISM; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn); }
Example #24
Source File: GroupingTupleTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCustomPartitioningTupleReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .reduce(new SelectOneReducer<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #25
Source File: ChainTaskTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBatchTaskOutputInCloseMethod() { final int numChainedTasks = 10; final int keyCnt = 100; final int valCnt = 10; try { initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE); addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0); addOutput(outList); registerTask(FlatMapDriver.class, MockMapStub.class); for (int i = 0; i < numChainedTasks; i++) { final TaskConfig taskConfig = new TaskConfig(new Configuration()); taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD); taskConfig.setOutputSerializer(serFact); taskConfig.setStubWrapper( new UserCodeClassWrapper<>(MockDuplicateLastValueMapFunction.class)); getTaskConfig().addChainedTask( ChainedFlatMapDriver.class, taskConfig, "chained-" + i); } final BatchTask<FlatMapFunction<Record, Record>, Record> testTask = new BatchTask<>(mockEnv); testTask.invoke(); Assert.assertEquals(keyCnt * valCnt + numChainedTasks, outList.size()); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #26
Source File: FeedbackPropertiesMatchTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoPartialSolutionFoundTwoInputOperator() { try { SourcePlanNode target = new SourcePlanNode(getSourceNode(), "Partial Solution"); SourcePlanNode source1 = new SourcePlanNode(getSourceNode(), "Source 1"); SourcePlanNode source2 = new SourcePlanNode(getSourceNode(), "Source 2"); Channel toMap1 = new Channel(source1); toMap1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toMap1.setLocalStrategy(LocalStrategy.NONE); SingleInputPlanNode map1 = new SingleInputPlanNode(getMapNode(), "Mapper 1", toMap1, DriverStrategy.MAP); Channel toMap2 = new Channel(source2); toMap2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toMap2.setLocalStrategy(LocalStrategy.NONE); SingleInputPlanNode map2 = new SingleInputPlanNode(getMapNode(), "Mapper 2", toMap2, DriverStrategy.MAP); Channel toJoin1 = new Channel(map1); Channel toJoin2 = new Channel(map2); toJoin1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toJoin1.setLocalStrategy(LocalStrategy.NONE); toJoin2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); toJoin2.setLocalStrategy(LocalStrategy.NONE); DualInputPlanNode join = new DualInputPlanNode(getJoinNode(), "Join", toJoin1, toJoin2, DriverStrategy.HYBRIDHASH_BUILD_FIRST); FeedbackPropertiesMeetRequirementsReport report = join.checkPartialSolutionPropertiesMet(target, new GlobalProperties(), new LocalProperties()); assertEquals(NO_PARTIAL_SOLUTION, report); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #27
Source File: JoinTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPartitionSortMergeTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_SORT_MERGE); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.INNER_MERGE, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
Example #28
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTwoWorksetIterationsDirectlyChained() throws Exception { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L)); DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L)); DataSet<Tuple2<Long, Long>> firstResult = doDeltaIteration(verticesWithInitialId, edges); DataSet<Tuple2<Long, Long>> secondResult = doDeltaIteration(firstResult, edges); secondResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); assertEquals(1, op.getDataSinks().size()); assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy()); assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode()); assertEquals(TempMode.NONE, wipn.getInput1().getTempMode()); assertEquals(TempMode.NONE, wipn.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #29
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testIterationNotPushingWorkOut() throws Exception { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue()); DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class); // Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper // to do the hash partitioning between the partial solution node and the join node // instead of pushing the partitioning out doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); assertEquals(1, op.getDataSinks().size()); assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode); BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); // check that work has not been pushed out for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) { assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy()); } assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy()); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #30
Source File: AllGroupWithPartialPreGroupProperties.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) { if (in.getShipStrategy() == ShipStrategyType.FORWARD) { // locally connected, directly instantiate return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")", in, DriverStrategy.ALL_GROUP_REDUCE); } else { // non forward case.plug in a combiner Channel toCombiner = new Channel(in.getSource()); toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED); // create an input node for combine with same parallelism as input node GroupReduceNode combinerNode = ((GroupReduceNode) node).getCombinerUtilityNode(); combinerNode.setParallelism(in.getSource().getParallelism()); SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine ("+node.getOperator().getName()+")", toCombiner, DriverStrategy.ALL_GROUP_REDUCE_COMBINE); combiner.setCosts(new Costs(0, 0)); combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties()); Channel toReducer = new Channel(combiner); toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(), in.getShipStrategySortOrder(), in.getDataExchangeMode()); toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder()); return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")", toReducer, DriverStrategy.ALL_GROUP_REDUCE); } }