org.apache.flink.optimizer.plan.PlanNode#getInputs

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: RangePartitionRewriter.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void postVisit(PlanNode node) {

	if(node instanceof IterationPlanNode) {
		IterationPlanNode iNode = (IterationPlanNode)node;
		if(!visitedIterationNodes.contains(iNode)) {
			visitedIterationNodes.add(iNode);
			iNode.acceptForStepFunction(this);
		}
	}

	final Iterable<Channel> inputChannels = node.getInputs();
	for (Channel channel : inputChannels) {
		ShipStrategyType shipStrategy = channel.getShipStrategy();
		// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
		if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {

			if(channel.getDataDistribution() == null) {
				if (node.isOnDynamicPath()) {
					throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
				}

				PlanNode channelSource = channel.getSource();
				List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
				channelSource.getOutgoingChannels().remove(channel);
				channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
			}
		}
	}
}

Source File: RangePartitionRewriter.java From flink with Apache License 2.0

5 votes

@Override
public void postVisit(PlanNode node) {

	if(node instanceof IterationPlanNode) {
		IterationPlanNode iNode = (IterationPlanNode)node;
		if(!visitedIterationNodes.contains(iNode)) {
			visitedIterationNodes.add(iNode);
			iNode.acceptForStepFunction(this);
		}
	}

	final Iterable<Channel> inputChannels = node.getInputs();
	for (Channel channel : inputChannels) {
		ShipStrategyType shipStrategy = channel.getShipStrategy();
		// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
		if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {

			if(channel.getDataDistribution() == null) {
				if (node.isOnDynamicPath()) {
					throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
				}

				PlanNode channelSource = channel.getSource();
				List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
				channelSource.getOutgoingChannels().remove(channel);
				channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
			}
		}
	}
}

Source File: RangePartitionRewriter.java From flink with Apache License 2.0

5 votes

@Override
public void postVisit(PlanNode node) {

	if(node instanceof IterationPlanNode) {
		IterationPlanNode iNode = (IterationPlanNode)node;
		if(!visitedIterationNodes.contains(iNode)) {
			visitedIterationNodes.add(iNode);
			iNode.acceptForStepFunction(this);
		}
	}

	final Iterable<Channel> inputChannels = node.getInputs();
	for (Channel channel : inputChannels) {
		ShipStrategyType shipStrategy = channel.getShipStrategy();
		// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
		if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {

			if(channel.getDataDistribution() == null) {
				if (node.isOnDynamicPath()) {
					throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
				}

				PlanNode channelSource = channel.getSource();
				List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
				channelSource.getOutgoingChannels().remove(channel);
				channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
			}
		}
	}
}

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: TestUtils.java From flink with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Source File: TestUtils.java From flink with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Java Code Examples for org.apache.flink.optimizer.plan.PlanNode#getInputs()