org.apache.flink.streaming.runtime.partitioner.RescalePartitioner Java Exaples

Source File: StreamingJobGraphGeneratorWithGlobalDataExchangeModeTest.java From flink with Apache License 2.0

6 votes

/**
 * Topology: source(parallelism=1) --(forward)--> map1(parallelism=1)
 *           --(rescale)--> map2(parallelism=2) --(rebalance)--> sink(parallelism=2).
 */
private static StreamGraph createStreamGraph() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.fromElements(1, 2, 3).setParallelism(1);

	final DataStream<Integer> forward = new DataStream<>(env, new PartitionTransformation<>(
		source.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map1 = forward.map(i -> i).startNewChain().setParallelism(1);

	final DataStream<Integer> rescale = new DataStream<>(env, new PartitionTransformation<>(
		map1.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map2 = rescale.map(i -> i).setParallelism(2);

	map2.rebalance().print().setParallelism(2);

	return env.getStreamGraph();
}

Source File: StreamingJobGraphGenerator.java From Flink-CEPplus with Apache License 2.0

5 votes

private void connect(Integer headOfChain, StreamEdge edge) {

		physicalEdgesInOrder.add(edge);

		Integer downStreamvertexID = edge.getTargetId();

		JobVertex headVertex = jobVertices.get(headOfChain);
		JobVertex downStreamVertex = jobVertices.get(downStreamvertexID);

		StreamConfig downStreamConfig = new StreamConfig(downStreamVertex.getConfiguration());

		downStreamConfig.setNumberOfInputs(downStreamConfig.getNumberOfInputs() + 1);

		StreamPartitioner<?> partitioner = edge.getPartitioner();
		JobEdge jobEdge;
		if (partitioner instanceof ForwardPartitioner || partitioner instanceof RescalePartitioner) {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
				headVertex,
				DistributionPattern.POINTWISE,
				ResultPartitionType.PIPELINED_BOUNDED);
		} else {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
					headVertex,
					DistributionPattern.ALL_TO_ALL,
					ResultPartitionType.PIPELINED_BOUNDED);
		}
		// set strategy name so that web interface can show it.
		jobEdge.setShipStrategyName(partitioner.toString());

		if (LOG.isDebugEnabled()) {
			LOG.debug("CONNECTED: {} - {} -> {}", partitioner.getClass().getSimpleName(),
					headOfChain, downStreamvertexID);
		}
	}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Verify that "blockingConnectionsBetweenChains" is off by default.
 */
@Test
public void testBlockingAfterChainingOffDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));

	partitionAfterFilterDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex filterAndPrintVertex = verticesSorted.get(1);

	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			filterAndPrintVertex.getInputs().get(0).getSource().getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test enabling the property "blockingConnectionsBetweenChains".
 */
@Test
public void testBlockingConnectionsBetweenChainsEnabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterFilterDataStream.map(value -> value).setParallelism(2);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(1);

	StreamGraph streamGraph = env.getStreamGraph();
	streamGraph.setBlockingConnectionsBetweenChains(true);
	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	// still can be chained
	JobVertex filterAndMapVertex = verticesSorted.get(1);
	JobVertex printVertex = verticesSorted.get(2);

	// the edge with undefined shuffle mode is translated into BLOCKING
	assertEquals(ResultPartitionType.BLOCKING, sourceVertex.getProducedDataSets().get(0).getResultType());
	// the edge with PIPELINED shuffle mode is translated into PIPELINED_BOUNDED
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, filterAndMapVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, printVertex.getInputs().get(0).getSource().getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGenerator.java From flink with Apache License 2.0

4 votes

private void connect(Integer headOfChain, StreamEdge edge) {

		physicalEdgesInOrder.add(edge);

		Integer downStreamvertexID = edge.getTargetId();

		JobVertex headVertex = jobVertices.get(headOfChain);
		JobVertex downStreamVertex = jobVertices.get(downStreamvertexID);

		StreamConfig downStreamConfig = new StreamConfig(downStreamVertex.getConfiguration());

		downStreamConfig.setNumberOfInputs(downStreamConfig.getNumberOfInputs() + 1);

		StreamPartitioner<?> partitioner = edge.getPartitioner();

		ResultPartitionType resultPartitionType;
		switch (edge.getShuffleMode()) {
			case PIPELINED:
				resultPartitionType = ResultPartitionType.PIPELINED_BOUNDED;
				break;
			case BATCH:
				resultPartitionType = ResultPartitionType.BLOCKING;
				break;
			case UNDEFINED:
				resultPartitionType = streamGraph.isBlockingConnectionsBetweenChains() ?
						ResultPartitionType.BLOCKING : ResultPartitionType.PIPELINED_BOUNDED;
				break;
			default:
				throw new UnsupportedOperationException("Data exchange mode " +
					edge.getShuffleMode() + " is not supported yet.");
		}

		JobEdge jobEdge;
		if (partitioner instanceof ForwardPartitioner || partitioner instanceof RescalePartitioner) {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
				headVertex,
				DistributionPattern.POINTWISE,
				resultPartitionType);
		} else {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
					headVertex,
					DistributionPattern.ALL_TO_ALL,
					resultPartitionType);
		}
		// set strategy name so that web interface can show it.
		jobEdge.setShipStrategyName(partitioner.toString());

		if (LOG.isDebugEnabled()) {
			LOG.debug("CONNECTED: {} - {} -> {}", partitioner.getClass().getSimpleName(),
					headOfChain, downStreamvertexID);
		}
	}

Source File: StreamingJobGraphGenerator.java From flink with Apache License 2.0

4 votes

private static boolean isPointwisePartitioner(StreamPartitioner<?> partitioner) {
	return partitioner instanceof ForwardPartitioner || partitioner instanceof RescalePartitioner;
}

Source File: DataStream.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Sets the partitioning of the {@link DataStream} so that the output elements
 * are distributed evenly to a subset of instances of the next operation in a round-robin
 * fashion.
 *
 * <p>The subset of downstream operations to which the upstream operation sends
 * elements depends on the degree of parallelism of both the upstream and downstream operation.
 * For example, if the upstream operation has parallelism 2 and the downstream operation
 * has parallelism 4, then one upstream operation would distribute elements to two
 * downstream operations while the other upstream operation would distribute to the other
 * two downstream operations. If, on the other hand, the downstream operation has parallelism
 * 2 while the upstream operation has parallelism 4 then two upstream operations will
 * distribute to one downstream operation while the other two upstream operations will
 * distribute to the other downstream operations.
 *
 * <p>In cases where the different parallelisms are not multiples of each other one or several
 * downstream operations will have a differing number of inputs from upstream operations.
 *
 * @return The DataStream with rescale partitioning set.
 */
@PublicEvolving
public DataStream<T> rescale() {
	return setConnectionType(new RescalePartitioner<T>());
}

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Sets the partitioning of the {@link DataStream} so that the output elements
 * are distributed evenly to a subset of instances of the next operation in a round-robin
 * fashion.
 *
 * <p>The subset of downstream operations to which the upstream operation sends
 * elements depends on the degree of parallelism of both the upstream and downstream operation.
 * For example, if the upstream operation has parallelism 2 and the downstream operation
 * has parallelism 4, then one upstream operation would distribute elements to two
 * downstream operations while the other upstream operation would distribute to the other
 * two downstream operations. If, on the other hand, the downstream operation has parallelism
 * 2 while the upstream operation has parallelism 4 then two upstream operations will
 * distribute to one downstream operation while the other two upstream operations will
 * distribute to the other downstream operations.
 *
 * <p>In cases where the different parallelisms are not multiples of each other one or several
 * downstream operations will have a differing number of inputs from upstream operations.
 *
 * @return The DataStream with rescale partitioning set.
 */
@PublicEvolving
public DataStream<T> rescale() {
	return setConnectionType(new RescalePartitioner<T>());
}

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Sets the partitioning of the {@link DataStream} so that the output elements
 * are distributed evenly to a subset of instances of the next operation in a round-robin
 * fashion.
 *
 * <p>The subset of downstream operations to which the upstream operation sends
 * elements depends on the degree of parallelism of both the upstream and downstream operation.
 * For example, if the upstream operation has parallelism 2 and the downstream operation
 * has parallelism 4, then one upstream operation would distribute elements to two
 * downstream operations while the other upstream operation would distribute to the other
 * two downstream operations. If, on the other hand, the downstream operation has parallelism
 * 2 while the upstream operation has parallelism 4 then two upstream operations will
 * distribute to one downstream operation while the other two upstream operations will
 * distribute to the other downstream operations.
 *
 * <p>In cases where the different parallelisms are not multiples of each other one or several
 * downstream operations will have a differing number of inputs from upstream operations.
 *
 * @return The DataStream with rescale partitioning set.
 */
@PublicEvolving
public DataStream<T> rescale() {
	return setConnectionType(new RescalePartitioner<T>());
}

org.apache.flink.streaming.runtime.partitioner.RescalePartitioner Java Examples