Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#generateSequence()
The following examples show how to use
org.apache.flink.api.java.ExecutionEnvironment#generateSequence() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JoinDeadlockITCase.java From flink with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> longs = env.generateSequence(0, 100000); DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper()); DataSet<Tuple1<Long>> longT2 = longT1.project(0); DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper()); longT2.join(longT3).where(0).equalTo(0).projectFirst(0) .join(longT1).where(0).equalTo(0).projectFirst(0) .writeAsText(resultPath); env.execute(); }
Example 2
Source File: HardPlansCompilationTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce -> * |--------------------------/ / * |--------------------------------------------/ * * First cross has SameKeyFirst output contract */ @Test public void testTicket158() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); set1.map(new IdentityMapper<Long>()).name("Map1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1") .cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2") .cross(set1).with(new IdentityCrosser<Long>()).name("Cross2") .groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3") .output(new DiscardingOutputFormat<Long>()).name("Sink"); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); }
Example 3
Source File: ConnectedComponentsWithRandomisedEdgesITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES); DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")); DataSet<Edge<Long, NullValue>> edges = edgeString.map(new EdgeParser()); DataSet<Vertex<Long, Long>> initialVertices = vertexIds.map(new IdAssigner()); Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env); DataSet<Vertex<Long, Long>> result = graph.run(new ConnectedComponents<>(100)); result.writeAsCsv(resultPath, "\n", " "); env.execute(); }
Example 4
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testBranchesOnlyInBCVariables1() { try{ ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<Long> input = env.generateSequence(1, 10); DataSet<Long> bc_input = env.generateSequence(1, 10); input .map(new IdentityMapper<Long>()).withBroadcastSet(bc_input, "name1") .map(new IdentityMapper<Long>()).withBroadcastSet(bc_input, "name2") .output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); compileNoStats(plan); } catch(Exception e){ e.printStackTrace(); fail(e.getMessage()); } }
Example 5
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 6 votes |
/** * * <pre> * (SINK 3) (SINK 1) (SINK 2) (SINK 4) * \ / \ / * (SRC A) (SRC B) * </pre> * * NOTE: this case is currently not caught by the compiler. we should enable the test once it is caught. */ @Test public void testBranchingDisjointPlan() { // construct the plan final String out1Path = "file:///test/1"; final String out2Path = "file:///test/2"; final String out3Path = "file:///test/3"; final String out4Path = "file:///test/4"; // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> sourceA = env.generateSequence(0,1); DataSet<Long> sourceB = env.generateSequence(0,1); sourceA.writeAsText(out1Path); sourceB.writeAsText(out2Path); sourceA.writeAsText(out3Path); sourceB.writeAsText(out4Path); Plan plan = env.createProgramPlan(); compileNoStats(plan); }
Example 6
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testBranchAfterIteration() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> sourceA = env.generateSequence(0,1); IterativeDataSet<Long> loopHead = sourceA.iterate(10); DataSet<Long> loopTail = loopHead.map(new IdentityMapper<Long>()).name("Mapper"); DataSet<Long> loopRes = loopHead.closeWith(loopTail); loopRes.output(new DiscardingOutputFormat<Long>()); loopRes.map(new IdentityMapper<Long>()) .output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); try { compileNoStats(plan); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 7
Source File: BroadcastUnionITCase.java From flink with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSet<Long> input = env.generateSequence(1, 10); DataSet<Long> bc1 = env.generateSequence(1, 5); DataSet<Long> bc2 = env.generateSequence(6, 10); List<Long> result = input .map(new Mapper()) .withBroadcastSet(bc1.union(bc2), BC_NAME) .reduce(new Reducer()) .collect(); Assert.assertEquals(Long.valueOf(3025), result.get(0)); }
Example 8
Source File: ReduceAllTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReduce() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1") .output(new DiscardingOutputFormat<Long>()).name("Sink"); Plan plan = env.createProgramPlan(); try { OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); } catch(CompilerException ce) { ce.printStackTrace(); fail("The pact compiler is unable to compile this plan correctly"); } }
Example 9
Source File: PartitionITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testRangePartitionerOnSequenceData() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<Long> dataSource = env.generateSequence(0, 10000); KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector(); MapPartitionFunction<Long, Tuple2<Long, Long>> minMaxSelector = new MinMaxSelector<>(new LongComparator(true)); Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true)); List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(minMaxSelector).collect(); Collections.sort(collected, tuple2Comparator); long previousMax = -1; for (Tuple2<Long, Long> tuple2 : collected) { if (previousMax == -1) { previousMax = tuple2.f1; } else { long currentMin = tuple2.f0; assertTrue(tuple2.f0 < tuple2.f1); assertEquals(previousMax + 1, currentMin); previousMax = tuple2.f1; } } }
Example 10
Source File: PartitionITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testRangePartitionInIteration() throws Exception { // does not apply for collection execution if (super.mode == TestExecutionMode.COLLECTION) { throw new InvalidProgramException("Does not apply for collection execution"); } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<Long> source = env.generateSequence(0, 10000); DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() { @Override public Tuple2<Long, String> map(Long v) throws Exception { return new Tuple2<>(v, Long.toString(v)); } }); DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0); DataSet<Tuple2<Long, String>> body = it.getWorkset() .partitionByRange(1) // Verify that range partition is not allowed in iteration .join(it.getSolutionSet()) .where(0).equalTo(0).projectFirst(0).projectSecond(1); DataSet<Tuple2<Long, String>> result = it.closeWith(body, body); result.collect(); // should fail }
Example 11
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBranchingUnion() { try { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> source1 = env.generateSequence(0,1); DataSet<Long> source2 = env.generateSequence(0,1); DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 1"); DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1"); DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1"); DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2"); DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2"); DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3"); DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3) .join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 2"); join2.output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); //Compile plan to verify that no error is thrown jobGen.compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 12
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBranchingUnion() { try { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> source1 = env.generateSequence(0,1); DataSet<Long> source2 = env.generateSequence(0,1); DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 1"); DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1"); DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1"); DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2"); DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2"); DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3"); DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3) .join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*") .with(new IdentityJoiner<Long>()).name("Join 2"); join2.output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); //Compile plan to verify that no error is thrown jobGen.compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 13
Source File: AdditionalOperatorsTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCrossWithLarge() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); DataSet<Long> set2 = env.generateSequence(0,1); set1.crossWithHuge(set2).name("Cross") .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); try { Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan); DualInputPlanNode crossPlanNode = resolver.getNode("Cross"); Channel in1 = crossPlanNode.getInput1(); Channel in2 = crossPlanNode.getInput2(); assertEquals(ShipStrategyType.BROADCAST, in1.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, in2.getShipStrategy()); } catch(CompilerException ce) { ce.printStackTrace(); fail("The pact compiler is unable to compile this plan correctly."); } }
Example 14
Source File: DataSinkTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testFailPrimitiveOrder1() { final ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); DataSet<Long> longDs = env .generateSequence(0, 2); // must not work longDs.writeAsText("/tmp/willNotHappen") .sortLocalOutput(0, Order.ASCENDING); }
Example 15
Source File: PartitionITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testForcedRebalancing() throws Exception { /* * Test forced rebalancing */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // generate some number in parallel DataSet<Long> ds = env.generateSequence(1, 3000); DataSet<Tuple2<Integer, Integer>> uniqLongs = ds // introduce some partition skew by filtering .filter(new Filter1()) // rebalance .rebalance() // count values in each partition .map(new PartitionIndexMapper()) .groupBy(0) .reduce(new Reducer1()) // round counts to mitigate runtime scheduling effects (lazy split assignment) .map(new Mapper1()); List<Tuple2<Integer, Integer>> result = uniqLongs.collect(); StringBuilder expected = new StringBuilder(); int numPerPartition = 2220 / env.getParallelism() / 10; for (int i = 0; i < env.getParallelism(); i++) { expected.append('(').append(i).append(',') .append(numPerPartition).append(")\n"); } compareResultAsText(result, expected.toString()); }
Example 16
Source File: NestedIterationsTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBulkIterationInClosure() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> data1 = env.generateSequence(1, 100); DataSet<Long> data2 = env.generateSequence(1, 100); IterativeDataSet<Long> firstIteration = data1.iterate(100); DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>())); IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100); DataSet<Long> joined = mainIteration.join(firstResult) .where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()) .with(new DummyFlatJoinFunction<Long>()); DataSet<Long> mainResult = mainIteration.closeWith(joined); mainResult.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); // optimizer should be able to translate this OptimizedPlan op = compileNoStats(p); // job graph generator should be able to translate this new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 17
Source File: AdditionalOperatorsTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCrossWithSmall() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); DataSet<Long> set2 = env.generateSequence(0,1); set1.crossWithTiny(set2).name("Cross") .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); try { Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan); DualInputPlanNode crossPlanNode = resolver.getNode("Cross"); Channel in1 = crossPlanNode.getInput1(); Channel in2 = crossPlanNode.getInput2(); assertEquals(ShipStrategyType.FORWARD, in1.getShipStrategy()); assertEquals(ShipStrategyType.BROADCAST, in2.getShipStrategy()); } catch(CompilerException ce) { ce.printStackTrace(); fail("The Flink optimizer is unable to compile this plan correctly."); } }
Example 18
Source File: AdditionalOperatorsTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCrossWithLarge() { // construct the plan ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> set1 = env.generateSequence(0,1); DataSet<Long> set2 = env.generateSequence(0,1); set1.crossWithHuge(set2).name("Cross") .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); try { Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan); DualInputPlanNode crossPlanNode = resolver.getNode("Cross"); Channel in1 = crossPlanNode.getInput1(); Channel in2 = crossPlanNode.getInput2(); assertEquals(ShipStrategyType.BROADCAST, in1.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, in2.getShipStrategy()); } catch(CompilerException ce) { ce.printStackTrace(); fail("The pact compiler is unable to compile this plan correctly."); } }
Example 19
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * * <pre> * (SRC A) * | * (MAP A) * / \ * (MAP B) (MAP C) * / / \ * (SINK A) (SINK B) (SINK C) * </pre> */ @SuppressWarnings("unchecked") @Test public void testBranchingWithMultipleDataSinks2() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> source = env.generateSequence(1, 10000); DataSet<Long> mappedA = source.map(new IdentityMapper<Long>()); DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>()); DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>()); mappedB.output(new DiscardingOutputFormat<Long>()); mappedC.output(new DiscardingOutputFormat<Long>()); mappedC.output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks()); OptimizedPlan oPlan = compileNoStats(plan); // ---------- check the optimizer plan ---------- // number of sinks assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size()); // remove matching sinks to check relation for (SinkPlanNode sink : oPlan.getDataSinks()) { assertTrue(sinks.remove(sink.getProgramOperator())); } assertTrue(sinks.isEmpty()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 20
Source File: PipelineBreakerTest.java From flink with Apache License 2.0 | 4 votes |
/** * * * * <pre> * +----------- ITERATION ---------+ * | | * +--+ +----+ * (source 1) ----------------->|PS| ------------ + +-->|next|---> (sink) * +--+ | (BC) | +----+ * | V | | * (source 2) --> (map) --+------|-----------> (MAPPER) ---+ | * | | ^ | * | | | (BC) | * | +----------------|--------------+ * | | * +--(map) --> (reduce) --+ * </pre> */ @Test public void testPipelineBreakerBroadcastedPartialSolution() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setExecutionMode(ExecutionMode.PIPELINED); env.setParallelism(64); DataSet<Long> initialSource = env.generateSequence(1, 10); IterativeDataSet<Long> iteration = initialSource.iterate(100); DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>()); DataSet<Long> bcInput1 = sourceWithMapper .map(new IdentityMapper<Long>()) .reduce(new SelectOneReducer<Long>()); DataSet<Long> result = sourceWithMapper .map(new IdentityMapper<Long>()) .withBroadcastSet(iteration, "bc2") .withBroadcastSet(bcInput1, "bc1"); iteration.closeWith(result).output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource(); SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction(); assertEquals(TempMode.CACHED, mapper.getInput().getTempMode()); assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }