Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#setParallelism()
The following examples show how to use
org.apache.flink.api.java.ExecutionEnvironment#setParallelism() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SortPartitionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSortPartitionByNestedFieldExpression() throws Exception { /* * Test sort partition on nested field expressions */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env); List<Tuple1<Boolean>> result = ds .map(new IdMapper<Tuple2<Tuple2<Integer, Integer>, String>>()).setParallelism(3) // parallelize input .sortPartition("f0.f1", Order.ASCENDING) .sortPartition("f1", Order.DESCENDING) .mapPartition(new OrderCheckMapper<>(new NestedTupleChecker())) .distinct().collect(); String expected = "(true)\n"; compareResultAsText(result, expected); }
Example 2
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind rebalance. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindRebalance() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .rebalance() .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example 3
Source File: BootstrapTransformationTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testOperatorSpecificMaxParallelismRespected() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSource<Integer> input = env.fromElements(0); BootstrapTransformation<Integer> transformation = OperatorTransformation .bootstrapWith(input) .setMaxParallelism(1) .transform(new ExampleStateBootstrapFunction()); int maxParallelism = transformation.getMaxParallelism(4); DataSet<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates( OperatorIDGenerator.fromUid("uid"), new MemoryStateBackend(), new Path(), maxParallelism ); Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 1, getParallelism(result)); }
Example 4
Source File: SortPartitionITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testSortPartitionByFieldExpression() throws Exception { /* * Test sort partition on field expression */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); List<Tuple1<Boolean>> result = ds .map(new IdMapper()).setParallelism(4) // parallelize input .sortPartition("f1", Order.DESCENDING) .mapPartition(new OrderCheckMapper<>(new Tuple3Checker())) .distinct().collect(); String expected = "(true)\n"; compareResultAsText(result, expected); }
Example 5
Source File: SortPartitionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSortPartitionByTwoFieldExpressions() throws Exception { /* * Test sort partition on two field expressions */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env); List<Tuple1<Boolean>> result = ds .map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input .sortPartition("f4", Order.ASCENDING) .sortPartition("f2", Order.DESCENDING) .mapPartition(new OrderCheckMapper<>(new Tuple5Checker())) .distinct().collect(); String expected = "(true)\n"; compareResultAsText(result, expected); }
Example 6
Source File: GroupReduceITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTupleKeySelectorGroupSort() throws Exception { /* * check correctness of sorted groupReduce on tuples with keyselector sorting */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds .groupBy(new LongFieldExtractor<Tuple3<Integer, Long, String>>(1)) .sortGroup(new StringFieldExtractor<Tuple3<Integer, Long, String>>(2), Order.DESCENDING) .reduceGroup(new Tuple3SortedGroupReduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "1,1,Hi\n" + "5,2,Hello world-Hello\n" + "15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" + "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" + "65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" + "111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n"; compareResultAsTuples(result, expected); }
Example 7
Source File: IPv6HostnamesITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testClusterWithIPv6host() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); env.getConfig().disableSysoutLogging(); // get input data DataSet<String> text = env.fromElements(WordCountData.TEXT.split("\n")); DataSet<Tuple2<String, Integer>> counts = text .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() { @Override public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception { for (String token : value.toLowerCase().split("\\W+")) { if (token.length() > 0) { out.collect(new Tuple2<String, Integer>(token, 1)); } } } }) .groupBy(0).sum(1); List<Tuple2<String, Integer>> result = counts.collect(); TestBaseUtils.compareResultAsText(result, WordCountData.COUNTS_AS_TUPLES); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 8
Source File: FlinkTableITCase.java From flink-connectors with Apache License 2.0 | 5 votes |
@Test public void testBatchTableSinkUsingDescriptor() throws Exception { // create a Pravega stream for test purposes Stream stream = Stream.of(setupUtils.getScope(), "testBatchTableSinkUsingDescriptor"); this.setupUtils.createTestStream(stream.getStreamName(), 1); // create a Flink Table environment ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(1); BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env); Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES)); Pravega pravega = new Pravega(); pravega.tableSinkWriterBuilder() .withRoutingKeyField("category") .forStream(stream) .withPravegaConfig(setupUtils.getPravegaConfig()); ConnectTableDescriptor desc = tableEnv.connect(pravega) .withFormat(new Json().failOnMissingField(true)) .withSchema(new Schema().field("category", DataTypes.STRING()). field("value", DataTypes.INT())); desc.createTemporaryTable("test"); final Map<String, String> propertiesMap = desc.toProperties(); final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap) .createBatchTableSink(propertiesMap); String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink"; ConnectorCatalogTable<?, ?> connectorCatalogSinkTable = ConnectorCatalogTable.sink(sink, true); tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable( ObjectPath.fromString(tableSinkPath), connectorCatalogSinkTable, false); table.insertInto("PravegaSink"); env.execute(); }
Example 9
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind filter. */ @Test public void checkJoinWithReplicatedSourceInputBehindFilter() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 10
Source File: BatchPojoExample.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20) .mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0)) .collect(Collectors.toList()); DataSet<CustomCassandraAnnotatedPojo> dataSet = env.fromCollection(customCassandraAnnotatedPojos); ClusterBuilder clusterBuilder = new ClusterBuilder() { private static final long serialVersionUID = -1754532803757154795L; @Override protected Cluster buildCluster(Cluster.Builder builder) { return builder.addContactPoints("127.0.0.1").build(); } }; dataSet.output(new CassandraPojoOutputFormat<>(clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)})); env.execute("Write"); /* * This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat. */ DataSet<CustomCassandraAnnotatedPojo> inputDS = env .createInput(new CassandraPojoInputFormat<>( SELECT_QUERY, clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.consistencyLevel(ConsistencyLevel.ANY)} )); inputDS.print(); }
Example 11
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedSource1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0); data.output(new DiscardingOutputFormat<Tuple2<Long,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 12
Source File: JoinITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testJoinWithRangePartitioning() throws Exception { /* * Test Join on tuples with multiple key field positions and same customized distribution */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env); env.setParallelism(4); TestDistribution testDis = new TestDistribution(); DataSet<Tuple2<String, String>> joinDs = DataSetUtils.partitionByRange(ds1, testDis, 0, 1) .join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4)) .where(0, 1) .equalTo(0, 4) .with(new T3T5FlatJoin()); List<Tuple2<String, String>> result = joinDs.collect(); String expected = "Hi,Hallo\n" + "Hello,Hallo Welt\n" + "Hello world,Hallo Welt wie gehts?\n" + "Hello world,ABC\n" + "I am fine.,HIJ\n" + "I am fine.,IJK\n"; compareResultAsTuples(result, expected); }
Example 13
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map. */ @Test public void checkJoinWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 14
Source File: ParallelismChangeTest.java From flink with Apache License 2.0 | 5 votes |
/** * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties). * * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance. * Expected to re-establish partitioning between map and reduce via a local hash. */ @Test public void checkPropertyHandlingWithIncreasingLocalParallelism() { final int p = DEFAULT_PARALLELISM * 2; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(p); DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p); set1.map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p).name("Reduce1") .map(new IdentityMapper<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Map2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()) .withForwardedFields("*").setParallelism(p * 2).name("Reduce2") .output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method, // because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same // mapper respectively reducer SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor(); SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor(); ShipStrategyType mapIn = map2Node.getInput().getShipStrategy(); ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy(); Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn)); }
Example 15
Source File: BatchPojoExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20) .mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0)) .collect(Collectors.toList()); DataSet<CustomCassandraAnnotatedPojo> dataSet = env.fromCollection(customCassandraAnnotatedPojos); ClusterBuilder clusterBuilder = new ClusterBuilder() { private static final long serialVersionUID = -1754532803757154795L; @Override protected Cluster buildCluster(Cluster.Builder builder) { return builder.addContactPoints("127.0.0.1").build(); } }; dataSet.output(new CassandraPojoOutputFormat<>(clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)})); env.execute("zhisheng"); /* * This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat. */ DataSet<CustomCassandraAnnotatedPojo> inputDS = env .createInput(new CassandraPojoInputFormat<>( SELECT_QUERY, clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.consistencyLevel(ConsistencyLevel.ANY)} )); inputDS.print(); }
Example 16
Source File: CoGroupITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCoGroupWithRangePartitioning() throws Exception { /* * Test coGroup on tuples with multiple key field positions and same customized distribution */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env); env.setParallelism(4); TestDistribution testDis = new TestDistribution(); DataSet<Tuple3<Integer, Long, String>> coGrouped = DataSetUtils.partitionByRange(ds1, testDis, 0, 4) .coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1)) .where(0, 4) .equalTo(0, 1) .with(new Tuple5Tuple3CoGroup()); List<Tuple3<Integer, Long, String>> result = coGrouped.collect(); String expected = "1,1,Hallo\n" + "2,2,Hallo Welt\n" + "3,2,Hallo Welt wie gehts?\n" + "3,2,ABC\n" + "5,3,HIJ\n" + "5,3,IJK\n"; compareResultAsTuples(result, expected); }
Example 17
Source File: ReduceCompilationTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test program compilation when the Reduce's combiner has been excluded * by setting {@code CombineHint.NONE}. */ @Test public void testGroupedReduceWithoutCombiner() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class) .name("source").setParallelism(6); data .groupBy(0) .reduce(new RichReduceFunction<Tuple2<String, Double>>() { @Override public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) { return null; } }).setCombineHint(CombineHint.NONE).name("reducer") .output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink"); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op); // get the original nodes SourcePlanNode sourceNode = resolver.getNode("source"); SingleInputPlanNode reduceNode = resolver.getNode("reducer"); SinkPlanNode sinkNode = resolver.getNode("sink"); // check wiring assertEquals(sourceNode, reduceNode.getInput().getSource()); // check the strategies assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy()); // check the keys assertEquals(new FieldList(0), reduceNode.getKeys(0)); assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys()); // check parallelism assertEquals(6, sourceNode.getParallelism()); assertEquals(8, reduceNode.getParallelism()); assertEquals(8, sinkNode.getParallelism()); }
Example 18
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * <pre> * SINK * | * COGROUP * +---/ \----+ * / \ * / MATCH10 * / | \ * / | MATCH9 * MATCH5 | | \ * | \ | | MATCH8 * | MATCH4 | | | \ * | | \ | | | MATCH7 * | | MATCH3 | | | | \ * | | | \ | | | | MATCH6 * | | | MATCH2 | | | | | | * | | | | \ +--+--+--+--+--+ * | | | | MATCH1 MAP * \ | | | | | /-----------/ * (DATA SOURCE ONE) * </pre> */ @Test public void testBranchingSourceMultipleTimes() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple2<Long, Long>> source = env.generateSequence(1, 10000000) .map(new Duplicator<Long>()); DataSet<Tuple2<Long, Long>> joined1 = source.join(source).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined2 = source.join(joined1).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined3 = source.join(joined2).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined4 = source.join(joined3).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined5 = source.join(joined4).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> mapped = source.map( new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Tuple2<Long, Long> value) { return null; } }); DataSet<Tuple2<Long, Long>> joined6 = mapped.join(mapped).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined7 = mapped.join(joined6).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined8 = mapped.join(joined7).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined9 = mapped.join(joined8).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined10 = mapped.join(joined9).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); joined5.coGroup(joined10) .where(1).equalTo(1) .with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>()) .output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 19
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTwoIterationsDirectlyChained() throws Exception { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L)); DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L)); DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges); DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges); depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); assertEquals(1, op.getDataSinks().size()); assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource(); // the hash partitioning has been pushed out of the delta iteration into the bulk iteration assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy()); // the input of the root step function is the last operator of the step function // since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning for (Channel c : bipn.getRootOfStepFunction().getInputs()) { assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy()); } assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode()); assertEquals(TempMode.NONE, wipn.getInput1().getTempMode()); assertEquals(TempMode.NONE, wipn.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 20
Source File: dVMPTest.java From toolbox with Apache License 2.0 | 4 votes |
public void testingMLParallelWasteHidden() throws IOException, ClassNotFoundException { //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); // load the true WasteIncinerator Bayesian network BayesianNetwork wasteIncinerator = BayesianNetworkLoader.loadFromFile("../networks/simulated/WasteIncinerator.bn"); wasteIncinerator.randomInitialization(new Random(0)); if (Main.VERBOSE) System.out.println("\nAsia network \n "); //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString()); if (Main.VERBOSE) System.out.println(wasteIncinerator.toString()); //Sampling from WasteIncinerator BN BayesianNetworkSampler sampler = new BayesianNetworkSampler(wasteIncinerator); sampler.setSeed(0); //Load the sampled data DataStream<DataInstance> data = sampler.sampleToDataStream(1000); sampler.setHiddenVar(wasteIncinerator.getVariables().getVariableById(6)); DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff"); //We load the data DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false); //ParallelVB is defined dVMP parallelVB = new dVMP(); parallelVB.setOutput(true); parallelVB.setSeed(5); parallelVB.setBatchSize(100); parallelVB.setLocalThreshold(0.001); parallelVB.setGlobalThreshold(0.001); parallelVB.setMaximumLocalIterations(100); parallelVB.setMaximumGlobalIterations(100); //Setting DAG parallelVB.setDAG(wasteIncinerator.getDAG()); //Setting the distributed data source parallelVB.initLearning(); parallelVB.updateModel(dataFlink); BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork(); if (Main.VERBOSE) System.out.println(bnet.toString()); }