Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#iterate()
The following examples show how to use
org.apache.flink.streaming.api.datastream.DataStream#iterate() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0 | 6 votes |
/**
* Test iteration job, check slot sharing group and co-location group.
*/
@Test
public void testIteration() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
IterativeStream<Integer> iteration = source.iterate(3000);
iteration.name("iteration").setParallelism(2);
DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
iteration.closeWith(filter).print();
StreamGraph streamGraph = env.getStreamGraph();
for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
assertNotNull(iterationPair.f0.getCoLocationGroup());
assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());
assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
}
}
Example 2
Source File: IterativeConnectedComponents.java From gelly-streaming with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception {
// Set up the environment
if(!parseParameters(args)) {
return;
}
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env);
IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate();
DataStream<Tuple2<Long, Long>> result = iteration.closeWith(
iteration.keyBy(0).flatMap(new AssignComponents()));
// Emit the results
result.print();
env.execute("Streaming Connected Components");
}
Example 3
Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0 | 6 votes |
/**
* Test iteration job, check slot sharing group and co-location group.
*/
@Test
public void testIteration() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
IterativeStream<Integer> iteration = source.iterate(3000);
iteration.name("iteration").setParallelism(2);
DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
iteration.closeWith(filter).print();
final ResourceSpec resources = ResourceSpec.newBuilder(1.0, 100).build();
iteration.getTransformation().setResources(resources, resources);
StreamGraph streamGraph = env.getStreamGraph();
for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
assertNotNull(iterationPair.f0.getCoLocationGroup());
assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());
assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
final ResourceSpec sourceMinResources = iterationPair.f0.getMinResources();
final ResourceSpec sinkMinResources = iterationPair.f1.getMinResources();
final ResourceSpec iterationResources = sourceMinResources.merge(sinkMinResources);
assertThat(iterationResources, equalsResourceSpec(resources));
}
}
Example 4
Source File: IterateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test
public void testImmutabilityWithCoiteration() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance
IterativeStream<Integer> iter1 = source.iterate();
// Calling withFeedbackType should create a new iteration
ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);
iter1.closeWith(iter1.map(noOpIntMap)).print();
iter2.closeWith(iter2.map(noOpCoMap)).print();
StreamGraph graph = env.getStreamGraph();
assertEquals(2, graph.getIterationSourceSinkPairs().size());
for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
}
}
Example 5
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
int numRetries = 5;
int timeoutScale = 1;
for (int numRetry = 0; numRetry < numRetries; numRetry++) {
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
iterated = new boolean[parallelism];
DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
.map(noOpBoolMap).name("ParallelizeMap");
IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);
DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);
iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());
iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());
env.execute();
for (boolean iter : iterated) {
assertTrue(iter);
}
break; // success
} catch (Throwable t) {
LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);
if (numRetry >= numRetries - 1) {
throw t;
} else {
timeoutScale *= 2;
}
}
}
}
Example 6
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 10);
IterativeStream<Integer> iter1 = source.iterate();
SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
iter1.closeWith(map1).print();
}
Example 7
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 10);
IterativeStream<Integer> iter1 = source.iterate();
SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
iter1.closeWith(map1).print();
}
Example 8
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 10);
IterativeStream<Integer> iter1 = source.iterate();
SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
iter1.closeWith(map1).print();
}
Example 9
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
int numRetries = 5;
int timeoutScale = 1;
for (int numRetry = 0; numRetry < numRetries; numRetry++) {
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
iterated = new boolean[parallelism];
DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
.map(noOpBoolMap).name("ParallelizeMap");
IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);
DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);
iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());
iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());
env.execute();
for (boolean iter : iterated) {
assertTrue(iter);
}
break; // success
} catch (Throwable t) {
LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);
if (numRetry >= numRetries - 1) {
throw t;
} else {
timeoutScale *= 2;
}
}
}
}
Example 10
Source File: IterateITCase.java From flink with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
Example 11
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 4 votes |
/**
* Test iteration job, check slot sharing group and co-location group.
*/
@Test
public void testIteration() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
IterativeStream<Integer> iteration = source.iterate(3000);
iteration.name("iteration").setParallelism(2);
DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
iteration.closeWith(filter).print();
JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup();
assertNotNull(slotSharingGroup);
CoLocationGroup iterationSourceCoLocationGroup = null;
CoLocationGroup iterationSinkCoLocationGroup = null;
for (JobVertex jobVertex : jobGraph.getVertices()) {
// all vertices have same slot sharing group by default
assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup());
// all iteration vertices have same co-location group,
// others have no co-location group by default
if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex));
} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex));
} else {
assertNull(jobVertex.getCoLocationGroup());
}
}
assertNotNull(iterationSourceCoLocationGroup);
assertNotNull(iterationSinkCoLocationGroup);
assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}
Example 12
Source File: IterateITCase.java From flink with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
Example 13
Source File: IterateITCase.java From flink with Apache License 2.0 | 4 votes |
@Test(expected = UnsupportedOperationException.class) public void testClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); IterativeStream<Integer> iter2 = source.iterate(); iter2.closeWith(iter1.map(noOpIntMap)); }
Example 14
Source File: IterateITCase.java From flink with Apache License 2.0 | 3 votes |
@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
IterativeStream<Integer> iter1 = source.iterate();
iter1.map(noOpIntMap).print();
env.execute();
}
Example 15
Source File: IterateITCase.java From flink with Apache License 2.0 | 3 votes |
@Test(expected = UnsupportedOperationException.class)
public void testDifferingParallelism() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// introduce dummy mapper to get to correct parallelism
DataStream<Integer> source = env.fromElements(1, 10)
.map(noOpIntMap);
IterativeStream<Integer> iter1 = source.iterate();
iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2));
}
Example 16
Source File: IterateITCase.java From flink with Apache License 2.0 | 3 votes |
@Test
public void testDoubleClosing() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// introduce dummy mapper to get to correct parallelism
DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
IterativeStream<Integer> iter1 = source.iterate();
iter1.closeWith(iter1.map(noOpIntMap));
iter1.closeWith(iter1.map(noOpIntMap));
}
Example 17
Source File: IterateITCase.java From flink with Apache License 2.0 | 3 votes |
@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
IterativeStream<Integer> iter1 = source.iterate();
iter1.map(noOpIntMap).print();
env.execute();
}
Example 18
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
IterativeStream<Integer> iter1 = source.iterate();
iter1.map(noOpIntMap).print();
env.execute();
}
Example 19
Source File: IterateITCase.java From flink with Apache License 2.0 | 3 votes |
private void createIteration(StreamExecutionEnvironment env, int timeoutScale) {
env.enableCheckpointing();
DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
.map(noOpBoolMap).name("ParallelizeMap");
IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);
iteration.closeWith(iteration.flatMap(new IterationHead())).addSink(new ReceiveCheckNoOpSink<Boolean>());
}
Example 20
Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
@Test(expected = UnsupportedOperationException.class)
public void testDifferingParallelism() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// introduce dummy mapper to get to correct parallelism
DataStream<Integer> source = env.fromElements(1, 10)
.map(noOpIntMap);
IterativeStream<Integer> iter1 = source.iterate();
iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2));
}