org.apache.flink.runtime.operators.shipping.ShipStrategyType Java Exaples

Source File: JsonMapper.java From Flink-CEPplus with Apache License 2.0

6 votes

public static String getShipStrategyString(ShipStrategyType shipType) {
	if (shipType == null) {
		return "(null)";
	}
	switch (shipType) {
		case NONE:
			return "(none)";
		case FORWARD:
			return "Forward";
		case BROADCAST:
			return "Broadcast";
		case PARTITION_HASH:
			return "Hash Partition";
		case PARTITION_RANGE:
			return "Range Partition";
		case PARTITION_RANDOM:
			return "Redistribute";
		case PARTITION_FORCED_REBALANCE:
			return "Rebalance";
		case PARTITION_CUSTOM:
			return "Custom Partition";
		default:
			return shipType.name();
	}
}

Source File: OutputEmitterTest.java From flink with Apache License 2.0

6 votes

private boolean verifyWrongPartitionHashKey(int position, int fieldNum) {
	final TypeComparator<Record> comparator = new RecordComparatorFactory(
		new int[] {position}, new Class[] {IntValue.class}).createComparator();
	final ChannelSelector<SerializationDelegate<Record>> selector = createChannelSelector(
		ShipStrategyType.PARTITION_HASH, comparator, 100);
	final SerializationDelegate<Record> delegate = new SerializationDelegate<>(new RecordSerializerFactory().getSerializer());

	Record record = new Record(2);
	record.setField(fieldNum, new IntValue(1));
	delegate.setInstance(record);

	try {
		selector.selectChannel(delegate);
	} catch (NullKeyFieldException re) {
		Assert.assertEquals(position, re.getFieldNumber());
		return true;
	}
	return false;
}

Source File: BinaryUnionReplacer.java From flink with Apache License 2.0

6 votes

public void collect(Channel in, List<Channel> inputs) {
	if (in.getSource() instanceof NAryUnionPlanNode) {
		// sanity check
		if (in.getShipStrategy() != ShipStrategyType.FORWARD) {
			throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators.");
		}
		if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) {
			throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators.");
		}

		inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs());
	} else {
		// is not a collapsed union node, so we take the channel directly
		inputs.add(in);
	}
}

Source File: OptimizerNode.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * This function connects the operators that produce the broadcast inputs to this operator.
 *
 * @param operatorToNode The map from program operators to optimizer nodes.
 * @param defaultExchangeMode The data exchange mode to use, if the operator does not
 *                            specify one.
 *
 * @throws CompilerException
 */
public void setBroadcastInputs(Map<Operator<?>, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode) {
	// skip for Operators that don't support broadcast variables 
	if (!(getOperator() instanceof AbstractUdfOperator<?, ?>)) {
		return;
	}

	// get all broadcast inputs
	AbstractUdfOperator<?, ?> operator = ((AbstractUdfOperator<?, ?>) getOperator());

	// create connections and add them
	for (Map.Entry<String, Operator<?>> input : operator.getBroadcastInputs().entrySet()) {
		OptimizerNode predecessor = operatorToNode.get(input.getValue());
		DagConnection connection = new DagConnection(predecessor, this,
														ShipStrategyType.BROADCAST, defaultExchangeMode);
		addBroadcastConnection(input.getKey(), connection);
		predecessor.addOutgoingConnection(connection);
	}
}

Source File: DualInputPlanNode.java From flink with Apache License 2.0

6 votes

public DualInputPlanNode(OptimizerNode template, String nodeName, Channel input1, Channel input2, DriverStrategy diverStrategy,
		FieldList driverKeyFields1, FieldList driverKeyFields2, boolean[] driverSortOrders)
{
	super(template, nodeName, diverStrategy);
	this.input1 = input1;
	this.input2 = input2;
	this.keys1 = driverKeyFields1;
	this.keys2 = driverKeyFields2;
	this.sortOrders = driverSortOrders;
	
	if (this.input1.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input1.setReplicationFactor(getParallelism());
	}
	if (this.input2.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input2.setReplicationFactor(getParallelism());
	}
	
	mergeBranchPlanMaps(input1.getSource(), input2.getSource());
}

Source File: DualInputPlanNode.java From Flink-CEPplus with Apache License 2.0

6 votes

public DualInputPlanNode(OptimizerNode template, String nodeName, Channel input1, Channel input2, DriverStrategy diverStrategy,
		FieldList driverKeyFields1, FieldList driverKeyFields2, boolean[] driverSortOrders)
{
	super(template, nodeName, diverStrategy);
	this.input1 = input1;
	this.input2 = input2;
	this.keys1 = driverKeyFields1;
	this.keys2 = driverKeyFields2;
	this.sortOrders = driverSortOrders;
	
	if (this.input1.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input1.setReplicationFactor(getParallelism());
	}
	if (this.input2.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input2.setReplicationFactor(getParallelism());
	}
	
	mergeBranchPlanMaps(input1.getSource(), input2.getSource());
}

Source File: ParallelismChangeTest.java From flink with Apache License 2.0

5 votes

/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}

Source File: OutputEmitterTest.java From flink with Apache License 2.0

5 votes

private int[] getSelectedChannelsHitCount(
		ShipStrategyType shipStrategyType,
		int numRecords,
		int numberOfChannels,
		Enum recordType) {
	final TypeComparator<Record> comparator = new RecordComparatorFactory(
		new int[] {0}, new Class[] {recordType == RecordType.INTEGER ? IntValue.class : StringValue.class}).createComparator();
	final ChannelSelector<SerializationDelegate<Record>> selector = createChannelSelector(shipStrategyType, comparator, numberOfChannels);
	final SerializationDelegate<Record> delegate = new SerializationDelegate<>(new RecordSerializerFactory().getSerializer());

	return getSelectedChannelsHitCount(selector, delegate, recordType, numRecords, numberOfChannels);
}

Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testDistinctDestroysPartitioningOfNonDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(1)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer must repartition, because it works on a different field
		assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());

		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: GroupingPojoTranslationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testCustomPartitioningTupleGroupReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo2> data = env.fromElements(new Pojo2())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.reduceGroup(new IdentityGroupReducerCombinable<Pojo2>())
				.output(new DiscardingOutputFormat<Pojo2>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind map.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: DistinctAndGroupingOptimizerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(0)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer can be forward, reuses partitioning from distinct
		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: OutputEmitterTest.java From flink with Apache License 2.0

5 votes

private void verifyForwardSelectedChannels(int numRecords, int numberOfChannels, Enum recordType) {
	int[] hits = getSelectedChannelsHitCount(ShipStrategyType.FORWARD, numRecords, numberOfChannels, recordType);

	assertTrue(hits[0] == numRecords);
	for (int i = 1; i < hits.length; i++) {
		assertTrue(hits[i] == 0);
	}
}

Source File: JoinTranslationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testPartitionHashFirstTest() {
	try {
		DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_HASH_FIRST);
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy());
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + ": " + e.getMessage());
	}
}

Source File: GroupingPojoTranslationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCustomPartitioningTupleGroupReduceSorted() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo3> data = env.fromElements(new Pojo3())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.sortGroup("b", Order.ASCENDING)
				.reduceGroup(new IdentityGroupReducerCombinable<Pojo3>())
				.output(new DiscardingOutputFormat<Pojo3>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: JoinTranslationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testBroadcastHashFirstTest() {
	try {
		DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.BROADCAST_HASH_FIRST);
		assertEquals(ShipStrategyType.BROADCAST, node.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, node.getInput2().getShipStrategy());
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + ": " + e.getMessage());
	}
}

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source.
 */
@Test
public void checkJoinWithReplicatedSourceInput() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: AllGroupWithPartialPreGroupProperties.java From flink with Apache License 2.0

5 votes

@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
	if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
		// locally connected, directly instantiate
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										in, DriverStrategy.ALL_GROUP_REDUCE);
	} else {
		// non forward case.plug in a combiner
		Channel toCombiner = new Channel(in.getSource());
		toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		
		// create an input node for combine with same parallelism as input node
		GroupReduceNode combinerNode = ((GroupReduceNode) node).getCombinerUtilityNode();
		combinerNode.setParallelism(in.getSource().getParallelism());

		SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode,
				"Combine ("+node.getOperator().getName()+")", toCombiner, DriverStrategy.ALL_GROUP_REDUCE_COMBINE);
		combiner.setCosts(new Costs(0, 0));
		combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
		
		Channel toReducer = new Channel(combiner);
		toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(),
									in.getShipStrategySortOrder(), in.getDataExchangeMode());

		toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										toReducer, DriverStrategy.ALL_GROUP_REDUCE);
	}
}

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind flatMap.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFlatMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.flatMap(new IdFlatMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: DagConnection.java From flink with Apache License 2.0

5 votes

/**
 * Constructor to create a result from an operator that is not
 * consumed by another operator.
 * 
 * @param source
 *        The source node.
 * @param exchangeMode
 *        The data exchange mode (pipelined / batch / batch only for shuffles / ... )
 */
public DagConnection(OptimizerNode source, ExecutionMode exchangeMode) {
	if (source == null) {
		throw new NullPointerException("Source must not be null.");
	}
	this.source = source;
	this.target = null;
	this.shipStrategy = ShipStrategyType.NONE;
	this.dataExchangeMode = exchangeMode;
}

Source File: DistinctAndGroupingOptimizerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(0)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer can be forward, reuses partitioning from distinct
		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: GroupingPojoTranslationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCustomPartitioningTupleReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo2> data = env.fromElements(new Pojo2())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.reduce(new SelectOneReducer<Pojo2>())
				.output(new DiscardingOutputFormat<Pojo2>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ParallelismChangeTest.java From flink with Apache License 2.0

5 votes

/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}

Source File: GroupingTupleTranslationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCustomPartitioningTupleReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0))
				.rebalance().setParallelism(4);
		
		data.groupBy(0).withPartitioner(new TestPartitionerInt())
			.reduce(new SelectOneReducer<Tuple2<Integer,Integer>>())
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: ChainTaskTest.java From flink with Apache License 2.0

5 votes

@Test
public void testBatchTaskOutputInCloseMethod() {
	final int numChainedTasks = 10;
	final int keyCnt = 100;
	final int valCnt = 10;
	try {
		initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
		addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
		addOutput(outList);
		registerTask(FlatMapDriver.class, MockMapStub.class);
		for (int i = 0; i < numChainedTasks; i++) {
			final TaskConfig taskConfig = new TaskConfig(new Configuration());
			taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
			taskConfig.setOutputSerializer(serFact);
			taskConfig.setStubWrapper(
				new UserCodeClassWrapper<>(MockDuplicateLastValueMapFunction.class));
			getTaskConfig().addChainedTask(
				ChainedFlatMapDriver.class, taskConfig, "chained-" + i);
		}
		final BatchTask<FlatMapFunction<Record, Record>, Record> testTask =
			new BatchTask<>(mockEnv);
		testTask.invoke();
		Assert.assertEquals(keyCnt * valCnt + numChainedTasks, outList.size());
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: FeedbackPropertiesMatchTest.java From flink with Apache License 2.0

5 votes

@Test
public void testNoPartialSolutionFoundTwoInputOperator() {
	try {
		SourcePlanNode target = new SourcePlanNode(getSourceNode(), "Partial Solution");

		SourcePlanNode source1 = new SourcePlanNode(getSourceNode(), "Source 1");
		SourcePlanNode source2 = new SourcePlanNode(getSourceNode(), "Source 2");
		
		Channel toMap1 = new Channel(source1);
		toMap1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toMap1.setLocalStrategy(LocalStrategy.NONE);
		SingleInputPlanNode map1 = new SingleInputPlanNode(getMapNode(), "Mapper 1", toMap1, DriverStrategy.MAP);
		
		Channel toMap2 = new Channel(source2);
		toMap2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toMap2.setLocalStrategy(LocalStrategy.NONE);
		SingleInputPlanNode map2 = new SingleInputPlanNode(getMapNode(), "Mapper 2", toMap2, DriverStrategy.MAP);
		
		Channel toJoin1 = new Channel(map1);
		Channel toJoin2 = new Channel(map2);
		
		toJoin1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toJoin1.setLocalStrategy(LocalStrategy.NONE);
		toJoin2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toJoin2.setLocalStrategy(LocalStrategy.NONE);
		
		DualInputPlanNode join = new DualInputPlanNode(getJoinNode(), "Join", toJoin1, toJoin2, DriverStrategy.HYBRIDHASH_BUILD_FIRST);
		
		FeedbackPropertiesMeetRequirementsReport report = join.checkPartialSolutionPropertiesMet(target, new GlobalProperties(), new LocalProperties());
		assertEquals(NO_PARTIAL_SOLUTION, report);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: JoinTranslationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testPartitionSortMergeTest() {
	try {
		DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_SORT_MERGE);
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy());
		assertEquals(DriverStrategy.INNER_MERGE, node.getDriverStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + ": " + e.getMessage());
	}
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testTwoWorksetIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> firstResult = doDeltaIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> secondResult = doDeltaIteration(firstResult, edges);
		
		secondResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());

		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: AllGroupWithPartialPreGroupProperties.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
	if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
		// locally connected, directly instantiate
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										in, DriverStrategy.ALL_GROUP_REDUCE);
	} else {
		// non forward case.plug in a combiner
		Channel toCombiner = new Channel(in.getSource());
		toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		
		// create an input node for combine with same parallelism as input node
		GroupReduceNode combinerNode = ((GroupReduceNode) node).getCombinerUtilityNode();
		combinerNode.setParallelism(in.getSource().getParallelism());

		SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode,
				"Combine ("+node.getOperator().getName()+")", toCombiner, DriverStrategy.ALL_GROUP_REDUCE_COMBINE);
		combiner.setCosts(new Costs(0, 0));
		combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
		
		Channel toReducer = new Channel(combiner);
		toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(),
									in.getShipStrategySortOrder(), in.getDataExchangeMode());

		toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										toReducer, DriverStrategy.ALL_GROUP_REDUCE);
	}
}

org.apache.flink.runtime.operators.shipping.ShipStrategyType Java Examples