org.apache.flink.api.java.ExecutionEnvironment#setParallelism

Source File: SortPartitionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testSortPartitionByNestedFieldExpression() throws Exception {
	/*
	 * Test sort partition on nested field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(3);

	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple2<Tuple2<Integer, Integer>, String>>()).setParallelism(3) // parallelize input
			.sortPartition("f0.f1", Order.ASCENDING)
			.sortPartition("f1", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new NestedTupleChecker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests compiler fail for join program with replicated data source behind rebalance.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindRebalance() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.rebalance()
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}

Source File: BootstrapTransformationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testOperatorSpecificMaxParallelismRespected() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSource<Integer> input = env.fromElements(0);

	BootstrapTransformation<Integer> transformation = OperatorTransformation
		.bootstrapWith(input)
		.setMaxParallelism(1)
		.transform(new ExampleStateBootstrapFunction());

	int maxParallelism = transformation.getMaxParallelism(4);
	DataSet<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(
		OperatorIDGenerator.fromUid("uid"),
		new MemoryStateBackend(),
		new Path(),
		maxParallelism
	);

	Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 1, getParallelism(result));
}

Source File: SortPartitionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSortPartitionByFieldExpression() throws Exception {
	/*
	 * Test sort partition on field expression
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper()).setParallelism(4) // parallelize input
			.sortPartition("f1", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Source File: SortPartitionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testSortPartitionByTwoFieldExpressions() throws Exception {
	/*
	 * Test sort partition on two field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
			.sortPartition("f4", Order.ASCENDING)
			.sortPartition("f2", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Source File: GroupReduceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testTupleKeySelectorGroupSort() throws Exception {
	/*
	 * check correctness of sorted groupReduce on tuples with keyselector sorting
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds
			.groupBy(new LongFieldExtractor<Tuple3<Integer, Long, String>>(1))
			.sortGroup(new StringFieldExtractor<Tuple3<Integer, Long, String>>(2), Order.DESCENDING)
			.reduceGroup(new Tuple3SortedGroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n"
			+
			"5,2,Hello world-Hello\n" +
			"15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
			"34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
			"65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
			"111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";

	compareResultAsTuples(result, expected);
}

Source File: IPv6HostnamesITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testClusterWithIPv6host() {
	try {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		env.getConfig().disableSysoutLogging();

		// get input data
		DataSet<String> text = env.fromElements(WordCountData.TEXT.split("\n"));

		DataSet<Tuple2<String, Integer>> counts = text
				.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
					@Override
					public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
						for (String token : value.toLowerCase().split("\\W+")) {
							if (token.length() > 0) {
								out.collect(new Tuple2<String, Integer>(token, 1));
							}
						}
					}
				})
				.groupBy(0).sum(1);

		List<Tuple2<String, Integer>> result = counts.collect();

		TestBaseUtils.compareResultAsText(result, WordCountData.COUNTS_AS_TUPLES);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: FlinkTableITCase.java From flink-connectors with Apache License 2.0

5 votes

@Test
public void testBatchTableSinkUsingDescriptor() throws Exception {

    // create a Pravega stream for test purposes
    Stream stream = Stream.of(setupUtils.getScope(), "testBatchTableSinkUsingDescriptor");
    this.setupUtils.createTestStream(stream.getStreamName(), 1);

    // create a Flink Table environment
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(1);
    BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);

    Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES));

    Pravega pravega = new Pravega();
    pravega.tableSinkWriterBuilder()
            .withRoutingKeyField("category")
            .forStream(stream)
            .withPravegaConfig(setupUtils.getPravegaConfig());

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(true))
            .withSchema(new Schema().field("category", DataTypes.STRING()).
                    field("value", DataTypes.INT()));
    desc.createTemporaryTable("test");

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap)
            .createBatchTableSink(propertiesMap);

    String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink";

    ConnectorCatalogTable<?, ?> connectorCatalogSinkTable = ConnectorCatalogTable.sink(sink, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSinkPath),
            connectorCatalogSinkTable, false);
    table.insertInto("PravegaSink");
    env.execute();
}

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind filter.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFilter() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: BatchPojoExample.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(1);

		List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20)
				.mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0))
				.collect(Collectors.toList());

		DataSet<CustomCassandraAnnotatedPojo> dataSet = env.fromCollection(customCassandraAnnotatedPojos);

		ClusterBuilder clusterBuilder = new ClusterBuilder() {
			private static final long serialVersionUID = -1754532803757154795L;

			@Override
			protected Cluster buildCluster(Cluster.Builder builder) {
				return builder.addContactPoints("127.0.0.1").build();
			}
		};

		dataSet.output(new CassandraPojoOutputFormat<>(clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)}));

		env.execute("Write");

		/*
		 *	This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat.
		 */
		DataSet<CustomCassandraAnnotatedPojo> inputDS = env
			.createInput(new CassandraPojoInputFormat<>(
				SELECT_QUERY,
				clusterBuilder,
				CustomCassandraAnnotatedPojo.class,
				() -> new Mapper.Option[]{Mapper.Option.consistencyLevel(ConsistencyLevel.ANY)}
			));

		inputDS.print();
	}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: JoinITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}

Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind map.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: ParallelismChangeTest.java From flink with Apache License 2.0

5 votes

/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}

Source File: BatchPojoExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20)
                .mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0))
                .collect(Collectors.toList());

        DataSet<CustomCassandraAnnotatedPojo> dataSet = env.fromCollection(customCassandraAnnotatedPojos);

        ClusterBuilder clusterBuilder = new ClusterBuilder() {
            private static final long serialVersionUID = -1754532803757154795L;

            @Override
            protected Cluster buildCluster(Cluster.Builder builder) {
                return builder.addContactPoints("127.0.0.1").build();
            }
        };

        dataSet.output(new CassandraPojoOutputFormat<>(clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)}));

        env.execute("zhisheng");

        /*
         *	This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat.
         */
        DataSet<CustomCassandraAnnotatedPojo> inputDS = env
                .createInput(new CassandraPojoInputFormat<>(
                        SELECT_QUERY,
                        clusterBuilder,
                        CustomCassandraAnnotatedPojo.class,
                        () -> new Mapper.Option[]{Mapper.Option.consistencyLevel(ConsistencyLevel.ANY)}
                ));

        inputDS.print();
    }

Source File: CoGroupITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}

Source File: ReduceCompilationTest.java From flink with Apache License 2.0

4 votes

/**
 * Test program compilation when the Reduce's combiner has been excluded
 * by setting {@code CombineHint.NONE}.
 */
@Test
public void testGroupedReduceWithoutCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(8);

	DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
		.name("source").setParallelism(6);

	data
		.groupBy(0)
		.reduce(new RichReduceFunction<Tuple2<String, Double>>() {
			@Override
			public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
				return null;
			}
		}).setCombineHint(CombineHint.NONE).name("reducer")
		.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");

	Plan p = env.createProgramPlan();
	OptimizedPlan op = compileNoStats(p);

	OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);

	// get the original nodes
	SourcePlanNode sourceNode = resolver.getNode("source");
	SingleInputPlanNode reduceNode = resolver.getNode("reducer");
	SinkPlanNode sinkNode = resolver.getNode("sink");

	// check wiring
	assertEquals(sourceNode, reduceNode.getInput().getSource());

	// check the strategies
	assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());

	// check the keys
	assertEquals(new FieldList(0), reduceNode.getKeys(0));
	assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());

	// check parallelism
	assertEquals(6, sourceNode.getParallelism());
	assertEquals(8, reduceNode.getParallelism());
	assertEquals(8, sinkNode.getParallelism());
}

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * <pre>
 *                              SINK
 *                               |
 *                            COGROUP
 *                        +---/    \----+
 *                       /               \
 *                      /             MATCH10
 *                     /               |    \
 *                    /                |  MATCH9
 *                MATCH5               |  |   \
 *                |   \                |  | MATCH8
 *                | MATCH4             |  |  |   \
 *                |  |   \             |  |  | MATCH7
 *                |  | MATCH3          |  |  |  |   \
 *                |  |  |   \          |  |  |  | MATCH6
 *                |  |  | MATCH2       |  |  |  |  |  |
 *                |  |  |  |   \       +--+--+--+--+--+
 *                |  |  |  | MATCH1            MAP 
 *                \  |  |  |  |  | /-----------/
 *                (DATA SOURCE ONE)
 * </pre>
 */
@Test
public void testBranchingSourceMultipleTimes() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Tuple2<Long, Long>> source = env.generateSequence(1, 10000000)
			.map(new Duplicator<Long>());

		DataSet<Tuple2<Long, Long>> joined1 = source.join(source).where(0).equalTo(0)
													.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined2 = source.join(joined1).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined3 = source.join(joined2).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined4 = source.join(joined3).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined5 = source.join(joined4).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> mapped = source.map(
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> value) {
						return null;
					}
		});

		DataSet<Tuple2<Long, Long>> joined6 = mapped.join(mapped).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined7 = mapped.join(joined6).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined8 = mapped.join(joined7).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined9 = mapped.join(joined8).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined10 = mapped.join(joined9).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());


		joined5.coGroup(joined10)
				.where(1).equalTo(1)
				.with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>())

			.output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTwoIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
		
		depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource();

		// the hash partitioning has been pushed out of the delta iteration into the bulk iteration
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		// the input of the root step function is the last operator of the step function
		// since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
		for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
		
		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: dVMPTest.java From toolbox with Apache License 2.0

4 votes

public void testingMLParallelWasteHidden() throws IOException, ClassNotFoundException {
    //Set-up Flink session.
    Configuration conf = new Configuration();
    conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
    final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
            env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);


    // load the true WasteIncinerator Bayesian network
    BayesianNetwork wasteIncinerator = BayesianNetworkLoader.loadFromFile("../networks/simulated/WasteIncinerator.bn");
    wasteIncinerator.randomInitialization(new Random(0));
    if (Main.VERBOSE) System.out.println("\nAsia network \n ");
    //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString());
    if (Main.VERBOSE) System.out.println(wasteIncinerator.toString());

    //Sampling from WasteIncinerator BN
    BayesianNetworkSampler sampler = new BayesianNetworkSampler(wasteIncinerator);
    sampler.setSeed(0);
    //Load the sampled data
    DataStream<DataInstance> data = sampler.sampleToDataStream(1000);
    sampler.setHiddenVar(wasteIncinerator.getVariables().getVariableById(6));
    DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff");

    //We load the data
    DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false);


    //ParallelVB is defined
    dVMP parallelVB = new dVMP();
    parallelVB.setOutput(true);
    parallelVB.setSeed(5);
    parallelVB.setBatchSize(100);
    parallelVB.setLocalThreshold(0.001);
    parallelVB.setGlobalThreshold(0.001);
    parallelVB.setMaximumLocalIterations(100);
    parallelVB.setMaximumGlobalIterations(100);

    //Setting DAG
    parallelVB.setDAG(wasteIncinerator.getDAG());


    //Setting the distributed data source
    parallelVB.initLearning();
    parallelVB.updateModel(dataFlink);

    BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork();

    if (Main.VERBOSE) System.out.println(bnet.toString());
}

Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#setParallelism()