org.apache.flink.streaming.api.datastream.IterativeStream Java Examples
The following examples show how to use
org.apache.flink.streaming.api.datastream.IterativeStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 7 votes |
@Test public void testImmutabilityWithCoiteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance IterativeStream<Integer> iter1 = source.iterate(); // Calling withFeedbackType should create a new iteration ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class); iter1.closeWith(iter1.map(noOpIntMap)).print(); iter2.closeWith(iter2.map(noOpCoMap)).print(); StreamGraph graph = env.getStreamGraph(); assertEquals(2, graph.getIterationSourceSinkPairs().size()); for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) { assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)), graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0))); } }
Example #2
Source File: FlinkTopology.java From incubator-samoa with Apache License 2.0 | 6 votes |
private void initializeCycle(int cycleID) { //get the head and tail of cycle FlinkProcessingItem tail = cycles.get(cycleID).get(0); FlinkProcessingItem head = cycles.get(cycleID).get(cycles.get(cycleID).size() - 1); //initialise source stream of the iteration, so as to use it for the iteration starting point if (!head.isInitialised()) { head.setOnIteration(true); head.initialise(); head.initialiseStreams(); } //initialise all nodes after head for (int node = cycles.get(cycleID).size() - 2; node >= 0; node--) { FlinkProcessingItem processingItem = cycles.get(cycleID).get(node); processingItem.initialise(); processingItem.initialiseStreams(); } SingleOutputStreamOperator backedge = (SingleOutputStreamOperator) head.getInputStreamBySourceID(tail.getComponentId()).getOutStream(); backedge.setParallelism(head.getParallelism()); ((IterativeStream) head.getDataStream()).closeWith(backedge); }
Example #3
Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0 | 6 votes |
/** * Test iteration job, check slot sharing group and co-location group. */ @Test public void testIteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); StreamGraph streamGraph = env.getStreamGraph(); for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) { assertNotNull(iterationPair.f0.getCoLocationGroup()); assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup()); assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup()); assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup()); } }
Example #4
Source File: IterateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testImmutabilityWithCoiteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance IterativeStream<Integer> iter1 = source.iterate(); // Calling withFeedbackType should create a new iteration ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class); iter1.closeWith(iter1.map(noOpIntMap)).print(); iter2.closeWith(iter2.map(noOpCoMap)).print(); StreamGraph graph = env.getStreamGraph(); assertEquals(2, graph.getIterationSourceSinkPairs().size()); for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) { assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)), graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0))); } }
Example #5
Source File: IterativeConnectedComponents.java From gelly-streaming with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // Set up the environment if(!parseParameters(args)) { return; } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env); IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate(); DataStream<Tuple2<Long, Long>> result = iteration.closeWith( iteration.keyBy(0).flatMap(new AssignComponents())); // Emit the results result.print(); env.execute("Streaming Connected Components"); }
Example #6
Source File: IterateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testImmutabilityWithCoiteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance IterativeStream<Integer> iter1 = source.iterate(); // Calling withFeedbackType should create a new iteration ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class); iter1.closeWith(iter1.map(noOpIntMap)).print(); iter2.closeWith(iter2.map(noOpCoMap)).print(); StreamGraph graph = env.getStreamGraph(); assertEquals(2, graph.getIterationSourceSinkPairs().size()); for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) { assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)), graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0))); } }
Example #7
Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0 | 6 votes |
/** * Test iteration job, check slot sharing group and co-location group. */ @Test public void testIteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); final ResourceSpec resources = ResourceSpec.newBuilder(1.0, 100).build(); iteration.getTransformation().setResources(resources, resources); StreamGraph streamGraph = env.getStreamGraph(); for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) { assertNotNull(iterationPair.f0.getCoLocationGroup()); assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup()); assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup()); assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup()); final ResourceSpec sourceMinResources = iterationPair.f0.getMinResources(); final ResourceSpec sinkMinResources = iterationPair.f1.getMinResources(); final ResourceSpec iterationResources = sourceMinResources.merge(sinkMinResources); assertThat(iterationResources, equalsResourceSpec(resources)); } }
Example #8
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Test public void testSimpleIteration() throws Exception { int numRetries = 5; int timeoutScale = 1; for (int numRetry = 0; numRetry < numRetries; numRetry++) { try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); iterated = new boolean[parallelism]; DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false)) .map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale); DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap); iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink()); iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink()); env.execute(); for (boolean iter : iterated) { assertTrue(iter); } break; // success } catch (Throwable t) { LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t); if (numRetry >= numRetries - 1) { throw t; } else { timeoutScale *= 2; } } } }
Example #9
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Test public void testSimpleIteration() throws Exception { int numRetries = 5; int timeoutScale = 1; for (int numRetry = 0; numRetry < numRetries; numRetry++) { try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); iterated = new boolean[parallelism]; DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false)) .map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale); DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap); iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink()); iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink()); env.execute(); for (boolean iter : iterated) { assertTrue(iter); } break; // success } catch (Throwable t) { LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t); if (numRetry >= numRetries - 1) { throw t; } else { timeoutScale *= 2; } } } }
Example #10
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = UnsupportedOperationException.class) public void testIncorrectParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10); IterativeStream<Integer> iter1 = source.iterate(); SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap); iter1.closeWith(map1).print(); }
Example #11
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = UnsupportedOperationException.class) public void testIncorrectParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10); IterativeStream<Integer> iter1 = source.iterate(); SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap); iter1.closeWith(map1).print(); }
Example #12
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Test public void testSimpleIteration() throws Exception { int numRetries = 5; int timeoutScale = 1; for (int numRetry = 0; numRetry < numRetries; numRetry++) { try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); iterated = new boolean[parallelism]; DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false)) .map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale); DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap); iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink()); iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink()); env.execute(); for (boolean iter : iterated) { assertTrue(iter); } break; // success } catch (Throwable t) { LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t); if (numRetry >= numRetries - 1) { throw t; } else { timeoutScale *= 2; } } } }
Example #13
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = UnsupportedOperationException.class) public void testIncorrectParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10); IterativeStream<Integer> iter1 = source.iterate(); SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap); iter1.closeWith(map1).print(); }
Example #14
Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test iteration job when disable slot sharing, check slot sharing group and co-location group. */ @Test public void testIterationWithSlotSharingDisabled() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); List<Transformation<?>> transformations = new ArrayList<>(); transformations.add(source.getTransformation()); transformations.add(iteration.getTransformation()); transformations.add(map.getTransformation()); transformations.add(filter.getTransformation()); StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig()); generator.setSlotSharingEnabled(false); StreamGraph streamGraph = generator.generate(); for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) { assertNotNull(iterationPair.f0.getCoLocationGroup()); assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup()); assertNotNull(iterationPair.f0.getSlotSharingGroup()); assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup()); } }
Example #15
Source File: IterateExample.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up input for the stream of integer pairs // obtain execution environment and set setBufferTimeout to 1 to enable // continuous flushing of the output buffers (lowest latency) StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment() .setBufferTimeout(1); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create input stream of integer pairs DataStream<Tuple2<Integer, Integer>> inputStream; if (params.has("input")) { inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap()); } else { System.out.println("Executing Iterate example with default input data set."); System.out.println("Use --input to specify file input."); inputStream = env.addSource(new RandomFibonacciSource()); } // create an iterative data stream from the input with 5 second timeout IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap()) .iterate(5000L); // apply the step function to get the next Fibonacci number // increment the counter and split the output with the output selector SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()) .split(new MySelector()); // close the iteration by selecting the tuples that were directed to the // 'iterate' channel in the output selector it.closeWith(step.select("iterate")); // to produce the final output select the tuples directed to the // 'output' channel then get the input pairs that have the greatest iteration counter // on a 1 second sliding window DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output") .map(new OutputMap()); // emit results if (params.has("output")) { numbers.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); numbers.print(); } // execute the program env.execute("Streaming Iteration Example"); }
Example #16
Source File: IterateExample.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up input for the stream of integer pairs // obtain execution environment and set setBufferTimeout to 1 to enable // continuous flushing of the output buffers (lowest latency) StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment() .setBufferTimeout(1); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create input stream of integer pairs DataStream<Tuple2<Integer, Integer>> inputStream; if (params.has("input")) { inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap()); } else { System.out.println("Executing Iterate example with default input data set."); System.out.println("Use --input to specify file input."); inputStream = env.addSource(new RandomFibonacciSource()); } // create an iterative data stream from the input with 5 second timeout IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap()) .iterate(5000); // apply the step function to get the next Fibonacci number // increment the counter and split the output with the output selector SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()) .split(new MySelector()); // close the iteration by selecting the tuples that were directed to the // 'iterate' channel in the output selector it.closeWith(step.select("iterate")); // to produce the final output select the tuples directed to the // 'output' channel then get the input pairs that have the greatest iteration counter // on a 1 second sliding window DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output") .map(new OutputMap()); // emit results if (params.has("output")) { numbers.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); numbers.print(); } // execute the program env.execute("Streaming Iteration Example"); }
Example #17
Source File: IterateExample.java From flink-learning with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1); env.getConfig().setGlobalJobParameters(params); IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = env.addSource(new RandomFibonacciSource()) .map(new InputMap()) .iterate(5000); SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()) .split(new MySelector()); it.closeWith(step.select("iterate")); step.select("output") .map(new OutputMap()) .print(); env.execute("Streaming Iteration Example"); }
Example #18
Source File: FtrlTrainStreamOp.java From Alink with Apache License 2.0 | 4 votes |
@Override public FtrlTrainStreamOp linkFrom(StreamOperator<?>... inputs) { checkOpSize(1, inputs); int vectorSize = getVectorSize(); boolean hasInterceptItem = getWithIntercept(); int vectorTrainIdx = getVectorCol() != null ? TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), getVectorCol()) : -1; int labelIdx = TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), getLabelCol()); String[] featureCols = getFeatureCols(); int[] featureIdx = null; int featureColLength = -1; if (vectorTrainIdx == -1) { featureIdx = new int[featureCols.length]; for (int i = 0; i < featureCols.length; ++i) { featureIdx[i] = TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), featureCols[i]); } featureColLength = featureCols.length; } final TypeInformation labelType = inputs[0].getColTypes()[labelIdx]; int parallelism = MLEnvironmentFactory.get(getMLEnvironmentId()) .getStreamExecutionEnvironment().getParallelism(); int featureSize = vectorTrainIdx != -1 ? vectorSize : featureColLength; final int[] splitInfo = getSplitInfo(featureSize, hasInterceptItem, parallelism); DataStream<Row> initData = inputs[0].getDataStream(); // Tuple5<SampleId, taskId, numSubVec, SubVec, label> DataStream<Tuple5<Long, Integer, Integer, Vector, Object>> input = initData.flatMap(new SplitVector(splitInfo, hasInterceptItem, vectorSize, vectorTrainIdx, featureIdx, labelIdx)) .partitionCustom(new CustomBlockPartitioner(), 1); // train data format = <sampleId, subSampleTaskId, subNum, SparseVector(subSample), label> // feedback format = Tuple7<sampleId, subSampleTaskId, subNum, SparseVector(subSample), label, wx, // timeStamps> IterativeStream.ConnectedIterativeStreams<Tuple5<Long, Integer, Integer, Vector, Object>, Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>> iteration = input.iterate(Long.MAX_VALUE) .withFeedbackType(TypeInformation .of(new TypeHint<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() {})); DataStream iterativeBody = iteration.flatMap( new CalcTask(dataBridge, splitInfo, getParams())) .keyBy(0) .flatMap(new ReduceTask(parallelism, splitInfo)) .partitionCustom(new CustomBlockPartitioner(), 1); DataStream<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>> result = iterativeBody.filter( new FilterFunction<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() { @Override public boolean filter(Tuple7<Long, Integer, Integer, Vector, Object, Double, Long> t3) throws Exception { // if t3.f0 > 0 && t3.f2 > 0 then feedback return (t3.f0 > 0 && t3.f2 > 0); } }); DataStream<Row> output = iterativeBody.filter( new FilterFunction<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() { @Override public boolean filter(Tuple7<Long, Integer, Integer, Vector, Object, Double, Long> value) throws Exception { /* if value.f0 small than 0, then output */ return value.f0 < 0; } }).flatMap(new WriteModel(labelType, getVectorCol(), featureCols, hasInterceptItem)); iteration.closeWith(result); TableSchema schema = new LinearModelDataConverter(labelType).getModelSchema(); TypeInformation[] types = new TypeInformation[schema.getFieldTypes().length + 2]; String[] names = new String[schema.getFieldTypes().length + 2]; names[0] = "bid"; names[1] = "ntab"; types[0] = Types.LONG; types[1] = Types.LONG; for (int i = 0; i < schema.getFieldTypes().length; ++i) { types[i + 2] = schema.getFieldTypes()[i]; names[i + 2] = schema.getFieldNames()[i]; } this.setOutput(output, names, types); return this; }
Example #19
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test iteration job, check slot sharing group and co-location group. */ @Test public void testIteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup(); assertNotNull(slotSharingGroup); CoLocationGroup iterationSourceCoLocationGroup = null; CoLocationGroup iterationSinkCoLocationGroup = null; for (JobVertex jobVertex : jobGraph.getVertices()) { // all vertices have same slot sharing group by default assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup()); // all iteration vertices have same co-location group, // others have no co-location group by default if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) { iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup(); assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex)); } else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) { iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup(); assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex)); } else { assertNull(jobVertex.getCoLocationGroup()); } } assertNotNull(iterationSourceCoLocationGroup); assertNotNull(iterationSinkCoLocationGroup); assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup); }
Example #20
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test iteration job, check slot sharing group and co-location group. */ @Test public void testIteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup(); assertNotNull(slotSharingGroup); CoLocationGroup iterationSourceCoLocationGroup = null; CoLocationGroup iterationSinkCoLocationGroup = null; for (JobVertex jobVertex : jobGraph.getVertices()) { // all vertices have same slot sharing group by default assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup()); // all iteration vertices have same co-location group, // others have no co-location group by default if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) { iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup(); assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex)); } else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) { iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup(); assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex)); } else { assertNull(jobVertex.getCoLocationGroup()); } } assertNotNull(iterationSourceCoLocationGroup); assertNotNull(iterationSinkCoLocationGroup); assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup); }
Example #21
Source File: IterateITCase.java From flink with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); IterativeStream<Integer> iter2 = source.iterate(); iter2.closeWith(iter1.map(noOpIntMap)); }
Example #22
Source File: IterateITCase.java From flink with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
Example #23
Source File: IterateITCase.java From flink with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
Example #24
Source File: IterateITCase.java From flink with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); IterativeStream<Integer> iter2 = source.iterate(); iter2.closeWith(iter1.map(noOpIntMap)); }
Example #25
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); IterativeStream<Integer> iter2 = source.iterate(); iter2.closeWith(iter1.map(noOpIntMap)); }
Example #26
Source File: IterateExample.java From flink-learning with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1); env.getConfig().setGlobalJobParameters(params); IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = env.addSource(new RandomFibonacciSource()) .map(new InputMap()) .iterate(5000); SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()) .split(new MySelector()); it.closeWith(step.select("iterate")); step.select("output") .map(new OutputMap()) .print(); env.execute("Streaming Iteration Example"); }
Example #27
Source File: IterateExample.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up input for the stream of integer pairs // obtain execution environment and set setBufferTimeout to 1 to enable // continuous flushing of the output buffers (lowest latency) StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment() .setBufferTimeout(1); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create input stream of integer pairs DataStream<Tuple2<Integer, Integer>> inputStream; if (params.has("input")) { inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap()); } else { System.out.println("Executing Iterate example with default input data set."); System.out.println("Use --input to specify file input."); inputStream = env.addSource(new RandomFibonacciSource()); } // create an iterative data stream from the input with 5 second timeout IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap()) .iterate(5000); // apply the step function to get the next Fibonacci number // increment the counter and split the output with the output selector SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step()) .split(new MySelector()); // close the iteration by selecting the tuples that were directed to the // 'iterate' channel in the output selector it.closeWith(step.select("iterate")); // to produce the final output select the tuples directed to the // 'output' channel then get the input pairs that have the greatest iteration counter // on a 1 second sliding window DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output") .map(new OutputMap()); // emit results if (params.has("output")) { numbers.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); numbers.print(); } // execute the program env.execute("Streaming Iteration Example"); }
Example #28
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test slot sharing group is enabled or disabled for iteration. */ @Test public void testDisableSlotSharingForIteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); List<Transformation<?>> transformations = new ArrayList<>(); transformations.add(source.getTransformation()); transformations.add(iteration.getTransformation()); transformations.add(map.getTransformation()); transformations.add(filter.getTransformation()); // when slot sharing group is disabled // all job vertices except iteration vertex would have no slot sharing group // iteration vertices would be set slot sharing group automatically StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig()); generator.setSlotSharingEnabled(false); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(generator.generate()); SlotSharingGroup iterationSourceSlotSharingGroup = null; SlotSharingGroup iterationSinkSlotSharingGroup = null; CoLocationGroup iterationSourceCoLocationGroup = null; CoLocationGroup iterationSinkCoLocationGroup = null; for (JobVertex jobVertex : jobGraph.getVertices()) { if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) { iterationSourceSlotSharingGroup = jobVertex.getSlotSharingGroup(); iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup(); } else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) { iterationSinkSlotSharingGroup = jobVertex.getSlotSharingGroup(); iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup(); } else { assertNull(jobVertex.getSlotSharingGroup()); } } assertNotNull(iterationSourceSlotSharingGroup); assertNotNull(iterationSinkSlotSharingGroup); assertEquals(iterationSourceSlotSharingGroup, iterationSinkSlotSharingGroup); assertNotNull(iterationSourceCoLocationGroup); assertNotNull(iterationSinkCoLocationGroup); assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup); }
Example #29
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
Example #30
Source File: PythonIterativeStream.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
PythonIterativeStream(IterativeStream<PyObject> iterativeStream) { super(iterativeStream); }