org.apache.flink.api.dag.Transformation Java Exaples

Source File: TableEnvironmentImpl.java From flink with Apache License 2.0

6 votes

@Override
public TableResult executeInternal(List<ModifyOperation> operations) {
	List<Transformation<?>> transformations = translate(operations);
	List<String> sinkIdentifierNames = extractSinkIdentifierNames(operations);
	String jobName = "insert-into_" + String.join(",", sinkIdentifierNames);
	Pipeline pipeline = execEnv.createPipeline(transformations, tableConfig, jobName);
	try {
		JobClient jobClient = execEnv.executeAsync(pipeline);
		TableSchema.Builder builder = TableSchema.builder();
		Object[] affectedRowCounts = new Long[operations.size()];
		for (int i = 0; i < operations.size(); ++i) {
			// use sink identifier name as field name
			builder.field(sinkIdentifierNames.get(i), DataTypes.BIGINT());
			affectedRowCounts[i] = -1L;
		}

		return TableResultImpl.builder()
				.jobClient(jobClient)
				.resultKind(ResultKind.SUCCESS_WITH_CONTENT)
				.tableSchema(builder.build())
				.data(Collections.singletonList(Row.of(affectedRowCounts)))
				.build();
	} catch (Exception e) {
		throw new TableException("Failed to execute sql", e);
	}
}

Source File: BatchExecutor.java From flink with Apache License 2.0

6 votes

@Override
public StreamGraph generateStreamGraph(List<Transformation<?>> transformations, String jobName) {
	StreamExecutionEnvironment execEnv = getExecutionEnvironment();
	setBatchProperties(execEnv);
	transformations.forEach(execEnv::addOperator);
	StreamGraph streamGraph;
	streamGraph = execEnv.getStreamGraph(getNonEmptyJobName(jobName));
	// All transformations should set managed memory size.
	ResourceSpec managedResourceSpec = NodeResourceUtil.fromManagedMem(0);
	streamGraph.getStreamNodes().forEach(sn -> {
		if (sn.getMinResources().equals(ResourceSpec.DEFAULT)) {
			sn.setResources(managedResourceSpec, managedResourceSpec);
		}
	});
	streamGraph.setChaining(true);
	streamGraph.setScheduleMode(ScheduleMode.LAZY_FROM_SOURCES_WITH_BATCH_SLOT_REQUEST);
	streamGraph.setStateBackend(null);
	if (streamGraph.getCheckpointConfig().isCheckpointingEnabled()) {
		throw new IllegalArgumentException("Checkpoint is not supported for batch jobs.");
	}
	if (isShuffleModeAllBatch()) {
		streamGraph.setBlockingConnectionsBetweenChains(true);
	}
	return streamGraph;
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SelectTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} holds the selected names.
 *
 * @see org.apache.flink.streaming.api.graph.StreamGraphGenerator
 */
private <T> Collection<Integer> transformSelect(SelectTransformation<T> select) {
	Transformation<T> input = select.getInput();
	Collection<Integer> resultIds = transform(input);

	// the recursive transform might have already transformed this
	if (alreadyTransformed.containsKey(select)) {
		return alreadyTransformed.get(select);
	}

	List<Integer> virtualResultIds = new ArrayList<>();

	for (int inputId : resultIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualSelectNode(inputId, virtualId, select.getSelectedNames());
		virtualResultIds.add(virtualId);
	}
	return virtualResultIds;
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SideOutputTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the side-output
 * {@link org.apache.flink.util.OutputTag}.
 *
 * @see org.apache.flink.streaming.api.graph.StreamGraphGenerator
 */
private <T> Collection<Integer> transformSideOutput(SideOutputTransformation<T> sideOutput) {
	Transformation<?> input = sideOutput.getInput();
	Collection<Integer> resultIds = transform(input);

	// the recursive transform might have already transformed this
	if (alreadyTransformed.containsKey(sideOutput)) {
		return alreadyTransformed.get(sideOutput);
	}

	List<Integer> virtualResultIds = new ArrayList<>();

	for (int inputId : resultIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualSideOutputNode(inputId, virtualId, sideOutput.getOutputTag());
		virtualResultIds.add(virtualId);
	}
	return virtualResultIds;
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SplitTransformation}.
 *
 * <p>We add the output selector to previously transformed nodes.
 */
private <T> Collection<Integer> transformSplit(SplitTransformation<T> split) {

	Transformation<T> input = split.getInput();
	Collection<Integer> resultIds = transform(input);

	validateSplitTransformation(input);

	// the recursive transform call might have transformed this already
	if (alreadyTransformed.containsKey(split)) {
		return alreadyTransformed.get(split);
	}

	for (int inputId : resultIds) {
		streamGraph.addOutputSelector(inputId, split.getOutputSelector());
	}

	return resultIds;
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

public StreamGraph generate() {
	streamGraph = new StreamGraph(executionConfig, checkpointConfig, savepointRestoreSettings);
	streamGraph.setStateBackend(stateBackend);
	streamGraph.setChaining(chaining);
	streamGraph.setScheduleMode(scheduleMode);
	streamGraph.setUserArtifacts(userArtifacts);
	streamGraph.setTimeCharacteristic(timeCharacteristic);
	streamGraph.setJobName(jobName);
	streamGraph.setGlobalDataExchangeMode(globalDataExchangeMode);

	alreadyTransformed = new HashMap<>();

	for (Transformation<?> transformation: transformations) {
		transform(transformation);
	}

	final StreamGraph builtStreamGraph = streamGraph;

	alreadyTransformed.clear();
	alreadyTransformed = null;
	streamGraph = null;

	return builtStreamGraph;
}

Source File: TableEnvironmentImpl.java From flink with Apache License 2.0

6 votes

@Override
public TableResult executeInternal(QueryOperation operation) {
	SelectSinkOperation sinkOperation = new SelectSinkOperation(operation);
	List<Transformation<?>> transformations = translate(Collections.singletonList(sinkOperation));
	Pipeline pipeline = execEnv.createPipeline(transformations, tableConfig, "collect");
	try {
		JobClient jobClient = execEnv.executeAsync(pipeline);
		SelectResultProvider resultProvider = sinkOperation.getSelectResultProvider();
		resultProvider.setJobClient(jobClient);
		return TableResultImpl.builder()
				.jobClient(jobClient)
				.resultKind(ResultKind.SUCCESS_WITH_CONTENT)
				.tableSchema(operation.getTableSchema())
				.data(resultProvider.getResultIterator())
				.setPrintStyle(TableResultImpl.PrintStyle.tableau(
						PrintUtils.MAX_COLUMN_WIDTH, PrintUtils.NULL_COLUMN, true, isStreamingMode))
				.build();
	} catch (Exception e) {
		throw new TableException("Failed to execute sql", e);
	}
}

Source File: DataStream.java From flink with Apache License 2.0

6 votes

/**
 * Creates a new {@link DataStream} by merging {@link DataStream} outputs of
 * the same type with each other. The DataStreams merged using this operator
 * will be transformed simultaneously.
 *
 * @param streams
 *            The DataStreams to union output with.
 * @return The {@link DataStream}.
 */
@SafeVarargs
public final DataStream<T> union(DataStream<T>... streams) {
	List<Transformation<T>> unionedTransforms = new ArrayList<>();
	unionedTransforms.add(this.transformation);

	for (DataStream<T> newStream : streams) {
		if (!getType().equals(newStream.getType())) {
			throw new IllegalArgumentException("Cannot union streams of different types: "
					+ getType() + " and " + newStream.getType());
		}

		unionedTransforms.add(newStream.getTransformation());
	}
	return new DataStream<>(this.environment, new UnionTransformation<>(unionedTransforms));
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

public StreamGraph generate() {
	streamGraph = new StreamGraph(executionConfig, checkpointConfig);
	streamGraph.setStateBackend(stateBackend);
	streamGraph.setChaining(chaining);
	streamGraph.setScheduleMode(scheduleMode);
	streamGraph.setUserArtifacts(userArtifacts);
	streamGraph.setTimeCharacteristic(timeCharacteristic);
	streamGraph.setJobName(jobName);
	streamGraph.setBlockingConnectionsBetweenChains(blockingConnectionsBetweenChains);

	alreadyTransformed = new HashMap<>();

	for (Transformation<?> transformation: transformations) {
		transform(transformation);
	}

	final StreamGraph builtStreamGraph = streamGraph;

	alreadyTransformed.clear();
	alreadyTransformed = null;
	streamGraph = null;

	return builtStreamGraph;
}

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

6 votes

/**
 * Test slot sharing is enabled.
 */
@Test
public void testEnableSlotSharing() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);
	DataStream<Integer> mapDataStream = sourceDataStream.map(x -> x + 1);

	final List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(sourceDataStream.getTransformation());
	transformations.add(mapDataStream.getTransformation());

	// all stream nodes share default group by default
	StreamGraph streamGraph = new StreamGraphGenerator(
			transformations, env.getConfig(), env.getCheckpointConfig())
		.generate();

	Collection<StreamNode> streamNodes = streamGraph.getStreamNodes();
	for (StreamNode streamNode : streamNodes) {
		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, streamNode.getSlotSharingGroup());
	}
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SideOutputTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the side-output
 * {@link org.apache.flink.util.OutputTag}.
 *
 * @see org.apache.flink.streaming.api.graph.StreamGraphGenerator
 */
private <T> Collection<Integer> transformSideOutput(SideOutputTransformation<T> sideOutput) {
	Transformation<?> input = sideOutput.getInput();
	Collection<Integer> resultIds = transform(input);

	// the recursive transform might have already transformed this
	if (alreadyTransformed.containsKey(sideOutput)) {
		return alreadyTransformed.get(sideOutput);
	}

	List<Integer> virtualResultIds = new ArrayList<>();

	for (int inputId : resultIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualSideOutputNode(inputId, virtualId, sideOutput.getOutputTag());
		virtualResultIds.add(virtualId);
	}
	return virtualResultIds;
}

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test iteration job when disable slot sharing, check slot sharing group and co-location group.
 */
@Test
public void testIterationWithSlotSharingDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());

	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);
	StreamGraph streamGraph = generator.generate();

	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertNotNull(iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code UnionTransformation}.
 *
 * <p>This is easy, we only have to transform the inputs and return all the IDs in a list so
 * that downstream operations can connect to all upstream nodes.
 */
private <T> Collection<Integer> transformUnion(UnionTransformation<T> union) {
	List<Transformation<T>> inputs = union.getInputs();
	List<Integer> resultIds = new ArrayList<>();

	for (Transformation<T> input: inputs) {
		resultIds.addAll(transform(input));
	}

	return resultIds;
}

Source File: StreamTableEnvironmentImpl.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private <T> Transformation<T> getTransformation(
	Table table,
	List<Transformation<?>> transformations) {
	if (transformations.size() != 1) {
		throw new TableException(String.format(
			"Expected a single transformation for query: %s\n Got: %s",
			table.getQueryOperation().asSummaryString(),
			transformations));
	}

	return (Transformation<T>) transformations.get(0);
}

Source File: CoFeedbackTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Adds a feedback edge. The parallelism of the {@code Transformation} must match
 * the parallelism of the input {@code Transformation} of the upstream
 * {@code Transformation}.
 *
 * @param transform The new feedback {@code Transformation}.
 */
public void addFeedbackEdge(Transformation<F> transform) {

	if (transform.getParallelism() != this.getParallelism()) {
		throw new UnsupportedOperationException(
				"Parallelism of the feedback stream must match the parallelism of the original" +
						" stream. Parallelism of original stream: " + this.getParallelism() +
						"; parallelism of feedback stream: " + transform.getParallelism());
	}

	feedbackEdges.add(transform);
}

Source File: UnionTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@code UnionTransformation} from the given input {@code Transformations}.
 *
 * <p>The input {@code Transformations} must all have the same type.
 *
 * @param inputs The list of input {@code Transformations}
 */
public UnionTransformation(List<Transformation<T>> inputs) {
	super("Union", inputs.get(0).getOutputType(), inputs.get(0).getParallelism());

	for (Transformation<T> input: inputs) {
		if (!input.getOutputType().equals(getOutputType())) {
			throw new UnsupportedOperationException("Type mismatch in input " + input);
		}
	}

	this.inputs = Lists.newArrayList(inputs);
}

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code UnionTransformation}.
 *
 * <p>This is easy, we only have to transform the inputs and return all the IDs in a list so
 * that downstream operations can connect to all upstream nodes.
 */
private <T> Collection<Integer> transformUnion(UnionTransformation<T> union) {
	List<Transformation<T>> inputs = union.getInputs();
	List<Integer> resultIds = new ArrayList<>();

	for (Transformation<T> input: inputs) {
		resultIds.addAll(transform(input));
	}

	return resultIds;
}

Source File: FeedbackTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Source File: IterativeStream.java From flink with Apache License 2.0

5 votes

/**
 * Closes the iteration. This method defines the end of the iterative
 * program part that will be fed back to the start of the iteration as
 * the second input in the {@link ConnectedStreams}.
 *
 * @param feedbackStream
 *            {@link DataStream} that will be used as second input to
 *            the iteration head.
 * @return The feedback stream.
 *
 */
public DataStream<F> closeWith(DataStream<F> feedbackStream) {

	Collection<Transformation<?>> predecessors = feedbackStream.getTransformation().getTransitivePredecessors();

	if (!predecessors.contains(this.coFeedbackTransformation)) {
		throw new UnsupportedOperationException(
				"Cannot close an iteration with a feedback DataStream that does not originate from said iteration.");
	}

	coFeedbackTransformation.addFeedbackEdge(feedbackStream.getTransformation());

	return feedbackStream;
}

Source File: FeedbackTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Adds a feedback edge. The parallelism of the {@code Transformation} must match
 * the parallelism of the input {@code Transformation} of this
 * {@code FeedbackTransformation}
 *
 * @param transform The new feedback {@code Transformation}.
 */
public void addFeedbackEdge(Transformation<T> transform) {

	if (transform.getParallelism() != this.getParallelism()) {
		throw new UnsupportedOperationException(
				"Parallelism of the feedback stream must match the parallelism of the original" +
						" stream. Parallelism of original stream: " + this.getParallelism() +
						"; parallelism of feedback stream: " + transform.getParallelism() +
						". Parallelism can be modified using DataStream#setParallelism() method");
	}

	feedbackEdges.add(transform);
}

Source File: SinkTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Source File: IterativeStream.java From flink with Apache License 2.0

5 votes

/**
 * Closes the iteration. This method defines the end of the iterative
 * program part that will be fed back to the start of the iteration as
 * the second input in the {@link ConnectedStreams}.
 *
 * @param feedbackStream
 *            {@link DataStream} that will be used as second input to
 *            the iteration head.
 * @return The feedback stream.
 *
 */
public DataStream<F> closeWith(DataStream<F> feedbackStream) {

	Collection<Transformation<?>> predecessors = feedbackStream.getTransformation().getTransitivePredecessors();

	if (!predecessors.contains(this.coFeedbackTransformation)) {
		throw new UnsupportedOperationException(
				"Cannot close an iteration with a feedback DataStream that does not originate from said iteration.");
	}

	coFeedbackTransformation.addFeedbackEdge(feedbackStream.getTransformation());

	return feedbackStream;
}

Source File: SinkTransformation.java From flink with Apache License 2.0

5 votes

public SinkTransformation(
		Transformation<T> input,
		String name,
		StreamOperatorFactory<Object> operatorFactory,
		int parallelism) {
	super(name, TypeExtractor.getForClass(Object.class), parallelism);
	this.input = input;
	this.operatorFactory = operatorFactory;
}

Source File: SplitTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Source File: FeedbackTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Adds a feedback edge. The parallelism of the {@code Transformation} must match
 * the parallelism of the input {@code Transformation} of this
 * {@code FeedbackTransformation}
 *
 * @param transform The new feedback {@code Transformation}.
 */
public void addFeedbackEdge(Transformation<T> transform) {

	if (transform.getParallelism() != this.getParallelism()) {
		throw new UnsupportedOperationException(
				"Parallelism of the feedback stream must match the parallelism of the original" +
						" stream. Parallelism of original stream: " + this.getParallelism() +
						"; parallelism of feedback stream: " + transform.getParallelism() +
						". Parallelism can be modified using DataStream#setParallelism() method");
	}

	feedbackEdges.add(transform);
}

Source File: OneInputTransformation.java From flink with Apache License 2.0

5 votes

public OneInputTransformation(
		Transformation<IN> input,
		String name,
		StreamOperatorFactory<OUT> operatorFactory,
		TypeInformation<OUT> outputType,
		int parallelism) {
	super(name, outputType, parallelism);
	this.input = input;
	this.operatorFactory = operatorFactory;
}

Source File: SideOutputTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Source File: SplitTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@code SplitTransformation} from the given input and {@code OutputSelector}.
 *
 * @param input The input {@code Transformation}
 * @param outputSelector The output selector
 */
public SplitTransformation(
	Transformation<T> input,
		OutputSelector<T> outputSelector) {
	super("Split", input.getOutputType(), input.getParallelism());
	this.input = input;
	this.outputSelector = outputSelector;
}

Source File: UnionTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@code UnionTransformation} from the given input {@code Transformations}.
 *
 * <p>The input {@code Transformations} must all have the same type.
 *
 * @param inputs The list of input {@code Transformations}
 */
public UnionTransformation(List<Transformation<T>> inputs) {
	super("Union", inputs.get(0).getOutputType(), inputs.get(0).getParallelism());

	for (Transformation<T> input: inputs) {
		if (!input.getOutputType().equals(getOutputType())) {
			throw new UnsupportedOperationException("Type mismatch in input " + input);
		}
	}

	this.inputs = Lists.newArrayList(inputs);
}

Source File: SplitTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

org.apache.flink.api.dag.Transformation Java Examples