org.apache.flink.streaming.api.datastream.IterativeStream Java Exaples

Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0

7 votes

@Test
public void testImmutabilityWithCoiteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance

	IterativeStream<Integer> iter1 = source.iterate();
	// Calling withFeedbackType should create a new iteration
	ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);

	iter1.closeWith(iter1.map(noOpIntMap)).print();
	iter2.closeWith(iter2.map(noOpCoMap)).print();

	StreamGraph graph = env.getStreamGraph();

	assertEquals(2, graph.getIterationSourceSinkPairs().size());

	for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
		assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
			graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
	}
}

Source File: FlinkTopology.java From incubator-samoa with Apache License 2.0

6 votes

private void initializeCycle(int cycleID) {
    //get the head and tail of cycle
    FlinkProcessingItem tail = cycles.get(cycleID).get(0);
    FlinkProcessingItem head = cycles.get(cycleID).get(cycles.get(cycleID).size() - 1);

    //initialise source stream of the iteration, so as to use it for the iteration starting point
    if (!head.isInitialised()) {
        head.setOnIteration(true);
        head.initialise();
        head.initialiseStreams();
    }

    //initialise all nodes after head
    for (int node = cycles.get(cycleID).size() - 2; node >= 0; node--) {
        FlinkProcessingItem processingItem = cycles.get(cycleID).get(node);
        processingItem.initialise();
        processingItem.initialiseStreams();
    }

    SingleOutputStreamOperator backedge = (SingleOutputStreamOperator) head.getInputStreamBySourceID(tail.getComponentId()).getOutStream();
    backedge.setParallelism(head.getParallelism());
    ((IterativeStream) head.getDataStream()).closeWith(backedge);
}

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

6 votes

/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	StreamGraph streamGraph = env.getStreamGraph();
	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}

Source File: IterateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testImmutabilityWithCoiteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance

	IterativeStream<Integer> iter1 = source.iterate();
	// Calling withFeedbackType should create a new iteration
	ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);

	iter1.closeWith(iter1.map(noOpIntMap)).print();
	iter2.closeWith(iter2.map(noOpCoMap)).print();

	StreamGraph graph = env.getStreamGraph();

	assertEquals(2, graph.getIterationSourceSinkPairs().size());

	for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
		assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
			graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
	}
}

Source File: IterativeConnectedComponents.java From gelly-streaming with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

		// Set up the environment
		if(!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env);

		IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate();
		DataStream<Tuple2<Long, Long>> result = iteration.closeWith(
				iteration.keyBy(0).flatMap(new AssignComponents()));

		// Emit the results
		result.print();

		env.execute("Streaming Connected Components");
	}

Source File: IterateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testImmutabilityWithCoiteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance

	IterativeStream<Integer> iter1 = source.iterate();
	// Calling withFeedbackType should create a new iteration
	ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);

	iter1.closeWith(iter1.map(noOpIntMap)).print();
	iter2.closeWith(iter2.map(noOpCoMap)).print();

	StreamGraph graph = env.getStreamGraph();

	assertEquals(2, graph.getIterationSourceSinkPairs().size());

	for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
		assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
			graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
	}
}

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

6 votes

/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	final ResourceSpec resources = ResourceSpec.newBuilder(1.0, 100).build();
	iteration.getTransformation().setResources(resources, resources);

	StreamGraph streamGraph = env.getStreamGraph();
	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());

		final ResourceSpec sourceMinResources = iterationPair.f0.getMinResources();
		final ResourceSpec sinkMinResources = iterationPair.f1.getMinResources();
		final ResourceSpec iterationResources = sourceMinResources.merge(sinkMinResources);
		assertThat(iterationResources, equalsResourceSpec(resources));
	}
}

Source File: IterateITCase.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}

Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}

Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}

Source File: IterateITCase.java From flink with Apache License 2.0

5 votes

@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}

Source File: IterateITCase.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}

Source File: IterateITCase.java From flink with Apache License 2.0

5 votes

@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test iteration job when disable slot sharing, check slot sharing group and co-location group.
 */
@Test
public void testIterationWithSlotSharingDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());

	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);
	StreamGraph streamGraph = generator.generate();

	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertNotNull(iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}

Source File: IterateExample.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000L);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}

Source File: IterateExample.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}

Source File: IterateExample.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);
        env.getConfig().setGlobalJobParameters(params);



        IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = env.addSource(new RandomFibonacciSource())
                .map(new InputMap())
                .iterate(5000);

        SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
                .split(new MySelector());

        it.closeWith(step.select("iterate"));

        step.select("output")
                .map(new OutputMap())
                .print();

        env.execute("Streaming Iteration Example");
    }

Source File: FtrlTrainStreamOp.java From Alink with Apache License 2.0

4 votes

@Override
public FtrlTrainStreamOp linkFrom(StreamOperator<?>... inputs) {
    checkOpSize(1, inputs);
    int vectorSize = getVectorSize();
    boolean hasInterceptItem = getWithIntercept();
    int vectorTrainIdx = getVectorCol() != null ?
        TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), getVectorCol()) : -1;
    int labelIdx = TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), getLabelCol());
    String[] featureCols = getFeatureCols();
    int[] featureIdx = null;
    int featureColLength = -1;
    if (vectorTrainIdx == -1) {
        featureIdx = new int[featureCols.length];
        for (int i = 0; i < featureCols.length; ++i) {
            featureIdx[i] = TableUtil.findColIndexWithAssertAndHint(inputs[0].getColNames(), featureCols[i]);
        }
        featureColLength = featureCols.length;
    }
    final TypeInformation labelType = inputs[0].getColTypes()[labelIdx];
    int parallelism = MLEnvironmentFactory.get(getMLEnvironmentId())
        .getStreamExecutionEnvironment().getParallelism();
    int featureSize = vectorTrainIdx != -1 ? vectorSize : featureColLength;
    final int[] splitInfo = getSplitInfo(featureSize, hasInterceptItem, parallelism);

    DataStream<Row> initData = inputs[0].getDataStream();

    // Tuple5<SampleId, taskId, numSubVec, SubVec, label>
    DataStream<Tuple5<Long, Integer, Integer, Vector, Object>> input
        = initData.flatMap(new SplitVector(splitInfo, hasInterceptItem, vectorSize,
        vectorTrainIdx, featureIdx, labelIdx))
        .partitionCustom(new CustomBlockPartitioner(), 1);

    // train data format = <sampleId, subSampleTaskId, subNum, SparseVector(subSample), label>
    // feedback format = Tuple7<sampleId, subSampleTaskId, subNum, SparseVector(subSample), label, wx,
    // timeStamps>
    IterativeStream.ConnectedIterativeStreams<Tuple5<Long, Integer, Integer, Vector, Object>,
        Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>
        iteration = input.iterate(Long.MAX_VALUE)
        .withFeedbackType(TypeInformation
            .of(new TypeHint<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() {}));

    DataStream iterativeBody = iteration.flatMap(
        new CalcTask(dataBridge, splitInfo, getParams()))
        .keyBy(0)
        .flatMap(new ReduceTask(parallelism, splitInfo))
        .partitionCustom(new CustomBlockPartitioner(), 1);

    DataStream<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>
        result = iterativeBody.filter(
        new FilterFunction<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() {
            @Override
            public boolean filter(Tuple7<Long, Integer, Integer, Vector, Object, Double, Long> t3)
                throws Exception {
                // if t3.f0 > 0 && t3.f2 > 0 then feedback
                return (t3.f0 > 0 && t3.f2 > 0);
            }
        });

    DataStream<Row> output = iterativeBody.filter(
        new FilterFunction<Tuple7<Long, Integer, Integer, Vector, Object, Double, Long>>() {
            @Override
            public boolean filter(Tuple7<Long, Integer, Integer, Vector, Object, Double, Long> value)
                throws Exception {
                /* if value.f0 small than 0, then output */
                return value.f0 < 0;
            }
        }).flatMap(new WriteModel(labelType, getVectorCol(), featureCols, hasInterceptItem));

    iteration.closeWith(result);

    TableSchema schema = new LinearModelDataConverter(labelType).getModelSchema();

    TypeInformation[] types = new TypeInformation[schema.getFieldTypes().length + 2];
    String[] names = new String[schema.getFieldTypes().length + 2];
    names[0] = "bid";
    names[1] = "ntab";
    types[0] = Types.LONG;
    types[1] = Types.LONG;
    for (int i = 0; i < schema.getFieldTypes().length; ++i) {
        types[i + 2] = schema.getFieldTypes()[i];
        names[i + 2] = schema.getFieldNames()[i];
    }

    this.setOutput(output, names, types);
    return this;
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

4 votes

/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup();
	assertNotNull(slotSharingGroup);

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		// all vertices have same slot sharing group by default
		assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup());

		// all iteration vertices have same co-location group,
		// others have no co-location group by default
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex));
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex));
		} else {
			assertNull(jobVertex.getCoLocationGroup());
		}
	}

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

4 votes

/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup();
	assertNotNull(slotSharingGroup);

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		// all vertices have same slot sharing group by default
		assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup());

		// all iteration vertices have same co-location group,
		// others have no co-location group by default
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex));
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex));
		} else {
			assertNull(jobVertex.getCoLocationGroup());
		}
	}

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}

Source File: IterateITCase.java From flink with Apache License 2.0

4 votes

@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	IterativeStream<Integer> iter2 = source.iterate();

	iter2.closeWith(iter1.map(noOpIntMap));

}

Source File: IterateITCase.java From flink with Apache License 2.0

4 votes

@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}

Source File: IterateITCase.java From flink with Apache License 2.0

4 votes

@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}

Source File: IterateITCase.java From flink with Apache License 2.0

4 votes

@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	IterativeStream<Integer> iter2 = source.iterate();

	iter2.closeWith(iter1.map(noOpIntMap));

}

Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	IterativeStream<Integer> iter2 = source.iterate();

	iter2.closeWith(iter1.map(noOpIntMap));

}

Source File: IterateExample.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setBufferTimeout(1);
        env.getConfig().setGlobalJobParameters(params);



        IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = env.addSource(new RandomFibonacciSource())
                .map(new InputMap())
                .iterate(5000);

        SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
                .split(new MySelector());

        it.closeWith(step.select("iterate"));

        step.select("output")
                .map(new OutputMap())
                .print();

        env.execute("Streaming Iteration Example");
    }

Source File: IterateExample.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up input for the stream of integer pairs

		// obtain execution environment and set setBufferTimeout to 1 to enable
		// continuous flushing of the output buffers (lowest latency)
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
				.setBufferTimeout(1);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// create input stream of integer pairs
		DataStream<Tuple2<Integer, Integer>> inputStream;
		if (params.has("input")) {
			inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap());
		} else {
			System.out.println("Executing Iterate example with default input data set.");
			System.out.println("Use --input to specify file input.");
			inputStream = env.addSource(new RandomFibonacciSource());
		}

		// create an iterative data stream from the input with 5 second timeout
		IterativeStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> it = inputStream.map(new InputMap())
				.iterate(5000);

		// apply the step function to get the next Fibonacci number
		// increment the counter and split the output with the output selector
		SplitStream<Tuple5<Integer, Integer, Integer, Integer, Integer>> step = it.map(new Step())
				.split(new MySelector());

		// close the iteration by selecting the tuples that were directed to the
		// 'iterate' channel in the output selector
		it.closeWith(step.select("iterate"));

		// to produce the final output select the tuples directed to the
		// 'output' channel then get the input pairs that have the greatest iteration counter
		// on a 1 second sliding window
		DataStream<Tuple2<Tuple2<Integer, Integer>, Integer>> numbers = step.select("output")
				.map(new OutputMap());

		// emit results
		if (params.has("output")) {
			numbers.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			numbers.print();
		}

		// execute the program
		env.execute("Streaming Iteration Example");
	}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

4 votes

/**
 * Test slot sharing group is enabled or disabled for iteration.
 */
@Test
public void testDisableSlotSharingForIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());
	// when slot sharing group is disabled
	// all job vertices except iteration vertex would have no slot sharing group
	// iteration vertices would be set slot sharing group automatically
	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(generator.generate());

	SlotSharingGroup iterationSourceSlotSharingGroup = null;
	SlotSharingGroup iterationSinkSlotSharingGroup = null;

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceSlotSharingGroup = jobVertex.getSlotSharingGroup();
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkSlotSharingGroup = jobVertex.getSlotSharingGroup();
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
		} else {
			assertNull(jobVertex.getSlotSharingGroup());
		}
	}

	assertNotNull(iterationSourceSlotSharingGroup);
	assertNotNull(iterationSinkSlotSharingGroup);
	assertEquals(iterationSourceSlotSharingGroup, iterationSinkSlotSharingGroup);

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}

Source File: IterateITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}

Source File: PythonIterativeStream.java From Flink-CEPplus with Apache License 2.0

4 votes

PythonIterativeStream(IterativeStream<PyObject> iterativeStream) {
	super(iterativeStream);
}

org.apache.flink.streaming.api.datastream.IterativeStream Java Examples