org.apache.flink.streaming.api.transformations.PartitionTransformation Java Exaples

Source File: StreamingJobGraphGeneratorWithGlobalDataExchangeModeTest.java From flink with Apache License 2.0

6 votes

/**
 * Topology: source(parallelism=1) --(forward)--> map1(parallelism=1)
 *           --(rescale)--> map2(parallelism=2) --(rebalance)--> sink(parallelism=2).
 */
private static StreamGraph createStreamGraph() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.fromElements(1, 2, 3).setParallelism(1);

	final DataStream<Integer> forward = new DataStream<>(env, new PartitionTransformation<>(
		source.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map1 = forward.map(i -> i).startNewChain().setParallelism(1);

	final DataStream<Integer> rescale = new DataStream<>(env, new PartitionTransformation<>(
		map1.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map2 = rescale.map(i -> i).setParallelism(2);

	map2.rebalance().print().setParallelism(2);

	return env.getStreamGraph();
}

Source File: DataStreamUtils.java From flink with Apache License 2.0

6 votes

/**
 * Reinterprets the given {@link DataStream} as a {@link KeyedStream}, which extracts keys with the given
 * {@link KeySelector}.
 *
 * <p>IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be
 * partitioned exactly in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 *
 * @param stream      The data stream to reinterpret. For every partition, this stream must be partitioned exactly
 *                    in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 * @param keySelector Function that defines how keys are extracted from the data stream.
 * @param typeInfo    Explicit type information about the key type.
 * @param <T>         Type of events in the data stream.
 * @param <K>         Type of the extracted keys.
 * @return The reinterpretation of the {@link DataStream} as a {@link KeyedStream}.
 */
public static <T, K> KeyedStream<T, K> reinterpretAsKeyedStream(
	DataStream<T> stream,
	KeySelector<T, K> keySelector,
	TypeInformation<K> typeInfo) {

	PartitionTransformation<T> partitionTransformation = new PartitionTransformation<>(
		stream.getTransformation(),
		new ForwardPartitioner<>());

	return new KeyedStream<>(
		stream,
		partitionTransformation,
		keySelector,
		typeInfo);
}

Source File: StreamingJobGraphGeneratorWithGlobalDataExchangeModeTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGlobalDataExchangeModeDoesNotOverrideSpecifiedShuffleMode() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	final DataStream<Integer> source = env.fromElements(1, 2, 3).setParallelism(1);
	final DataStream<Integer> forward = new DataStream<>(env, new PartitionTransformation<>(
		source.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	forward.map(i -> i).startNewChain().setParallelism(1);
	final StreamGraph streamGraph = env.getStreamGraph();
	streamGraph.setGlobalDataExchangeMode(GlobalDataExchangeMode.ALL_EDGES_BLOCKING);

	final JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	final List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	final JobVertex sourceVertex = verticesSorted.get(0);

	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
}

Source File: DataStreamUtils.java From flink with Apache License 2.0

6 votes

/**
 * Reinterprets the given {@link DataStream} as a {@link KeyedStream}, which extracts keys with the given
 * {@link KeySelector}.
 *
 * <p>IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be
 * partitioned exactly in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 *
 * @param stream      The data stream to reinterpret. For every partition, this stream must be partitioned exactly
 *                    in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 * @param keySelector Function that defines how keys are extracted from the data stream.
 * @param typeInfo    Explicit type information about the key type.
 * @param <T>         Type of events in the data stream.
 * @param <K>         Type of the extracted keys.
 * @return The reinterpretation of the {@link DataStream} as a {@link KeyedStream}.
 */
public static <T, K> KeyedStream<T, K> reinterpretAsKeyedStream(
	DataStream<T> stream,
	KeySelector<T, K> keySelector,
	TypeInformation<K> typeInfo) {

	PartitionTransformation<T> partitionTransformation = new PartitionTransformation<>(
		stream.getTransformation(),
		new ForwardPartitioner<>());

	return new KeyedStream<>(
		stream,
		partitionTransformation,
		keySelector,
		typeInfo);
}

Source File: SiddhiStream.java From flink-siddhi with Apache License 2.0

6 votes

/**
 * Siddhi Continuous Query Language (CQL)
 *
 * @return ExecutionSiddhiStream context
 */
public ExecutionSiddhiStream cql(DataStream<ControlEvent> controlStream) {
    DataStream<Tuple2<StreamRoute, Object>> unionStream = controlStream
        .map(new NamedControlStream(ControlEvent.DEFAULT_INTERNAL_CONTROL_STREAM))
        .broadcast()
        .union(this.toDataStream())
        .transform("add route transform",
            SiddhiTypeFactory.getStreamTupleTypeInformation(TypeInformation.of(Object.class)),
            new AddRouteOperator(getCepEnvironment().getDataStreamSchemas()));

    DataStream<Tuple2<StreamRoute, Object>> partitionedStream = new DataStream<>(
        unionStream.getExecutionEnvironment(),
        new PartitionTransformation<>(unionStream.getTransformation(),
        new DynamicPartitioner()));
    return new ExecutionSiddhiStream(partitionedStream, null, getCepEnvironment());
}

Source File: DataStreamUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Reinterprets the given {@link DataStream} as a {@link KeyedStream}, which extracts keys with the given
 * {@link KeySelector}.
 *
 * <p>IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be
 * partitioned exactly in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 *
 * @param stream      The data stream to reinterpret. For every partition, this stream must be partitioned exactly
 *                    in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 * @param keySelector Function that defines how keys are extracted from the data stream.
 * @param typeInfo    Explicit type information about the key type.
 * @param <T>         Type of events in the data stream.
 * @param <K>         Type of the extracted keys.
 * @return The reinterpretation of the {@link DataStream} as a {@link KeyedStream}.
 */
public static <T, K> KeyedStream<T, K> reinterpretAsKeyedStream(
	DataStream<T> stream,
	KeySelector<T, K> keySelector,
	TypeInformation<K> typeInfo) {

	PartitionTransformation<T> partitionTransformation = new PartitionTransformation<>(
		stream.getTransformation(),
		new ForwardPartitioner<>());

	return new KeyedStream<>(
		stream,
		partitionTransformation,
		keySelector,
		typeInfo);
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Verify that "blockingConnectionsBetweenChains" is off by default.
 */
@Test
public void testBlockingAfterChainingOffDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));

	partitionAfterFilterDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex filterAndPrintVertex = verticesSorted.get(1);

	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			filterAndPrintVertex.getInputs().get(0).getSource().getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Create a StreamGraph as below.
 *
 * <p>source1 --(rebalance & pipelined)--> Map1
 *
 * <p>source2 --(rebalance & blocking)--> Map2
 */
private StreamGraph createStreamGraphForSlotSharingTest() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source1 = env.fromElements(1, 2, 3).name("source1");
	source1.rebalance().map(v -> v).name("map1");

	final DataStream<Integer> source2 = env.fromElements(4, 5, 6).name("source2");
	final DataStream<Integer> partitioned = new DataStream<>(env, new PartitionTransformation<>(
		source2.getTransformation(), new RebalancePartitioner<>(), ShuffleMode.BATCH));
	partitioned.map(v -> v).name("map2");

	return env.getStreamGraph();
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

private <T> void validateSplitTransformation(Transformation<T> input) {
	if (input instanceof SelectTransformation || input instanceof SplitTransformation) {
		throw new IllegalStateException("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof SideOutputTransformation) {
		throw new IllegalStateException("Split after side-outputs are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof UnionTransformation) {
		for (Transformation<T> transformation : ((UnionTransformation<T>) input).getInputs()) {
			validateSplitTransformation(transformation);
		}
	} else if (input instanceof PartitionTransformation) {
		validateSplitTransformation(((PartitionTransformation) input).getInput());
	} else {
		return;
	}
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code PartitionTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the partition
 * property. @see StreamGraphGenerator
 */
private <T> Collection<Integer> transformPartition(PartitionTransformation<T> partition) {
	Transformation<T> input = partition.getInput();
	List<Integer> resultIds = new ArrayList<>();

	Collection<Integer> transformedIds = transform(input);
	for (Integer transformedId: transformedIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualPartitionNode(
				transformedId, virtualId, partition.getPartitioner(), partition.getShuffleMode());
		resultIds.add(virtualId);
	}

	return resultIds;
}

Source File: KeyedStream.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector, TypeInformation<KEY> keyType) {
	this(
		dataStream,
		new PartitionTransformation<>(
			dataStream.getTransformation(),
			new KeyGroupStreamPartitioner<>(keySelector, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM)),
		keySelector,
		keyType);
}

Source File: HiveTableSourceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testParallelismOnLimitPushDown() {
	final String dbName = "source_db";
	final String tblName = "test_parallelism_limit_pushdown";
	TableEnvironment tEnv = createTableEnv();
	tEnv.getConfig().getConfiguration().setBoolean(
			HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
	tEnv.getConfig().getConfiguration().setInteger(
			ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
	tEnv.executeSql("CREATE TABLE source_db.test_parallelism_limit_pushdown " +
				"(`year` STRING, `value` INT) partitioned by (pt int)");
	HiveTestUtils.createTextTableInserter(hiveShell, dbName, tblName)
				.addRow(new Object[]{"2014", 3})
				.addRow(new Object[]{"2014", 4})
				.commit("pt=0");
	HiveTestUtils.createTextTableInserter(hiveShell, dbName, tblName)
				.addRow(new Object[]{"2015", 2})
				.addRow(new Object[]{"2015", 5})
				.commit("pt=1");
	Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_limit_pushdown limit 1");
	PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
	RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
	ExecNode execNode = planner.translateToExecNodePlan(toScala(Collections.singletonList(relNode))).get(0);
	@SuppressWarnings("unchecked")
	Transformation transformation = execNode.translateToPlan(planner);
	Assert.assertEquals(1, ((PartitionTransformation) ((OneInputTransformation) transformation).getInput())
		.getInput().getParallelism());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test enabling the property "blockingConnectionsBetweenChains".
 */
@Test
public void testBlockingConnectionsBetweenChainsEnabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterFilterDataStream.map(value -> value).setParallelism(2);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(1);

	StreamGraph streamGraph = env.getStreamGraph();
	streamGraph.setBlockingConnectionsBetweenChains(true);
	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	// still can be chained
	JobVertex filterAndMapVertex = verticesSorted.get(1);
	JobVertex printVertex = verticesSorted.get(2);

	// the edge with undefined shuffle mode is translated into BLOCKING
	assertEquals(ResultPartitionType.BLOCKING, sourceVertex.getProducedDataSets().get(0).getResultType());
	// the edge with PIPELINED shuffle mode is translated into PIPELINED_BOUNDED
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, filterAndMapVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, printVertex.getInputs().get(0).getSource().getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

private <T> void validateSplitTransformation(Transformation<T> input) {
	if (input instanceof SelectTransformation || input instanceof SplitTransformation) {
		throw new IllegalStateException("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof SideOutputTransformation) {
		throw new IllegalStateException("Split after side-outputs are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof UnionTransformation) {
		for (Transformation<T> transformation : ((UnionTransformation<T>) input).getInputs()) {
			validateSplitTransformation(transformation);
		}
	} else if (input instanceof PartitionTransformation) {
		validateSplitTransformation(((PartitionTransformation) input).getInput());
	} else {
		return;
	}
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code PartitionTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the partition
 * property. @see StreamGraphGenerator
 */
private <T> Collection<Integer> transformPartition(PartitionTransformation<T> partition) {
	Transformation<T> input = partition.getInput();
	List<Integer> resultIds = new ArrayList<>();

	Collection<Integer> transformedIds = transform(input);
	for (Integer transformedId: transformedIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualPartitionNode(
				transformedId, virtualId, partition.getPartitioner(), partition.getShuffleMode());
		resultIds.add(virtualId);
	}

	return resultIds;
}

Source File: KeyedStream.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector, TypeInformation<KEY> keyType) {
	this(
		dataStream,
		new PartitionTransformation<>(
			dataStream.getTransformation(),
			new KeyGroupStreamPartitioner<>(keySelector, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM)),
		keySelector,
		keyType);
}

Source File: StreamGraphGenerator.java From Flink-CEPplus with Apache License 2.0

5 votes

private <T> void validateSplitTransformation(StreamTransformation<T> input) {
	if (input instanceof SelectTransformation || input instanceof SplitTransformation) {
		throw new IllegalStateException("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof SideOutputTransformation) {
		throw new IllegalStateException("Split after side-outputs are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof UnionTransformation) {
		for (StreamTransformation<T> transformation : ((UnionTransformation<T>) input).getInputs()) {
			validateSplitTransformation(transformation);
		}
	} else if (input instanceof PartitionTransformation) {
		validateSplitTransformation(((PartitionTransformation) input).getInput());
	} else {
		return;
	}
}

Source File: StreamGraphGenerator.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Transforms a {@code PartitionTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the partition
 * property. @see StreamGraphGenerator
 */
private <T> Collection<Integer> transformPartition(PartitionTransformation<T> partition) {
	StreamTransformation<T> input = partition.getInput();
	List<Integer> resultIds = new ArrayList<>();

	Collection<Integer> transformedIds = transform(input);
	for (Integer transformedId: transformedIds) {
		int virtualId = StreamTransformation.getNewNodeId();
		streamGraph.addVirtualPartitionNode(transformedId, virtualId, partition.getPartitioner());
		resultIds.add(virtualId);
	}

	return resultIds;
}

Source File: KeyedStream.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector, TypeInformation<KEY> keyType) {
	this(
		dataStream,
		new PartitionTransformation<>(
			dataStream.getTransformation(),
			new KeyGroupStreamPartitioner<>(keySelector, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM)),
		keySelector,
		keyType);
}

Source File: KeyedStream.java From flink with Apache License 2.0

3 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector} and {@link TypeInformation}
 * to partition operator state by key, where the partitioning is defined by a {@link PartitionTransformation}.
 *
 * @param stream
 *            Base stream of data
 * @param partitionTransformation
 *            Function that determines how the keys are distributed to downstream operator(s)
 * @param keySelector
 *            Function to extract keys from the base stream
 * @param keyType
 *            Defines the type of the extracted keys
 */
@Internal
KeyedStream(
	DataStream<T> stream,
	PartitionTransformation<T> partitionTransformation,
	KeySelector<T, KEY> keySelector,
	TypeInformation<KEY> keyType) {

	super(stream.getExecutionEnvironment(), partitionTransformation);
	this.keySelector = clean(keySelector);
	this.keyType = validateKeyType(keyType);
}

Source File: KeyedStream.java From flink with Apache License 2.0

3 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector} and {@link TypeInformation}
 * to partition operator state by key, where the partitioning is defined by a {@link PartitionTransformation}.
 *
 * @param stream
 *            Base stream of data
 * @param partitionTransformation
 *            Function that determines how the keys are distributed to downstream operator(s)
 * @param keySelector
 *            Function to extract keys from the base stream
 * @param keyType
 *            Defines the type of the extracted keys
 */
@Internal
KeyedStream(
	DataStream<T> stream,
	PartitionTransformation<T> partitionTransformation,
	KeySelector<T, KEY> keySelector,
	TypeInformation<KEY> keyType) {

	super(stream.getExecutionEnvironment(), partitionTransformation);
	this.keySelector = clean(keySelector);
	this.keyType = validateKeyType(keyType);
}

Source File: KeyedStream.java From Flink-CEPplus with Apache License 2.0

3 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector} and {@link TypeInformation}
 * to partition operator state by key, where the partitioning is defined by a {@link PartitionTransformation}.
 *
 * @param stream
 *            Base stream of data
 * @param partitionTransformation
 *            Function that determines how the keys are distributed to downstream operator(s)
 * @param keySelector
 *            Function to extract keys from the base stream
 * @param keyType
 *            Defines the type of the extracted keys
 */
@Internal
KeyedStream(
	DataStream<T> stream,
	PartitionTransformation<T> partitionTransformation,
	KeySelector<T, KEY> keySelector,
	TypeInformation<KEY> keyType) {

	super(stream.getExecutionEnvironment(), partitionTransformation);
	this.keySelector = clean(keySelector);
	this.keyType = validateKeyType(keyType);
}

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Internal function for setting the partitioner for the DataStream.
 *
 * @param partitioner
 *            Partitioner to set.
 * @return The modified DataStream.
 */
protected DataStream<T> setConnectionType(StreamPartitioner<T> partitioner) {
	return new DataStream<>(this.getExecutionEnvironment(), new PartitionTransformation<>(this.getTransformation(), partitioner));
}

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Internal function for setting the partitioner for the DataStream.
 *
 * @param partitioner
 *            Partitioner to set.
 * @return The modified DataStream.
 */
protected DataStream<T> setConnectionType(StreamPartitioner<T> partitioner) {
	return new DataStream<>(this.getExecutionEnvironment(), new PartitionTransformation<>(this.getTransformation(), partitioner));
}

org.apache.flink.streaming.api.transformations.PartitionTransformation Java Examples