org.apache.flink.streaming.api.datastream.DataStreamUtils Java Exaples

Source File: CollectITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}

Source File: CollectITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}

Source File: CollectITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}

Source File: RegionFailoverITCase.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();
		env.getConfig().disableSysoutLogging();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}

Source File: FlinkUniverse.java From stateful-functions with Apache License 2.0

5 votes

private SingleOutputStreamOperator<Message> functionOperator(
    DataStream<Message> input, Map<EgressIdentifier<?>, OutputTag<Object>> sideOutputs) {

  TypeInformation<Message> typeInfo = input.getType();

  FunctionGroupDispatchFactory operatorFactory = new FunctionGroupDispatchFactory(sideOutputs);

  return DataStreamUtils.reinterpretAsKeyedStream(input, new MessageKeySelector())
      .transform(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_NAME, typeInfo, operatorFactory)
      .uid(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_UID);
}

Source File: FlinkUniverse.java From flink-statefun with Apache License 2.0

5 votes

private SingleOutputStreamOperator<Message> functionOperator(
    DataStream<Message> input, Map<EgressIdentifier<?>, OutputTag<Object>> sideOutputs) {

  TypeInformation<Message> typeInfo = input.getType();

  FunctionGroupDispatchFactory operatorFactory =
      new FunctionGroupDispatchFactory(configuration, sideOutputs);

  return DataStreamUtils.reinterpretAsKeyedStream(input, new MessageKeySelector())
      .transform(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_NAME, typeInfo, operatorFactory)
      .uid(StatefulFunctionsJobConstants.FUNCTION_OPERATOR_UID);
}

Source File: FlinkTsFileStreamSource.java From incubator-iotdb with Apache License 2.0

5 votes

public static void main(String[] args) throws IOException {
	String path = "test.tsfile";
	TsFileUtils.writeTsFile(path);
	new File(path).deleteOnExit();
	String[] filedNames = {
		QueryConstant.RESERVED_TIME,
		"device_1.sensor_1",
		"device_1.sensor_2",
		"device_1.sensor_3",
		"device_2.sensor_1",
		"device_2.sensor_2",
		"device_2.sensor_3"
	};
	TypeInformation[] typeInformations = new TypeInformation[] {
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG,
		Types.LONG
	};
	List<Path> paths = Arrays.stream(filedNames)
		.filter(s -> !s.equals(QueryConstant.RESERVED_TIME))
		.map(Path::new)
		.collect(Collectors.toList());
	RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, filedNames);
	QueryExpression queryExpression = QueryExpression.create(paths, null);
	RowRowRecordParser parser = RowRowRecordParser.create(rowTypeInfo, queryExpression.getSelectedSeries());
	TsFileInputFormat<Row> inputFormat = new TsFileInputFormat<>(queryExpression, parser);
	StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment();
	inputFormat.setFilePath("source.tsfile");
	DataStream<Row> source = senv.createInput(inputFormat);
	DataStream<String> rowString = source.map(Row::toString);
	Iterator<String> result = DataStreamUtils.collect(rowString);
	while (result.hasNext()) {
		System.out.println(result.next());
	}
}

Source File: RowTsFileInputFormatITCase.java From incubator-iotdb with Apache License 2.0

5 votes

@Test
public void testStreamExecution() throws Exception {
	// read files in a directory
	TsFileInputFormat<Row> inputFormat = prepareInputFormat(tmpDir);
	DataStream<Row> source = senv.createInput(inputFormat);
	Iterator<String> rowStringIterator = DataStreamUtils.collect(source.map(Row::toString));
	String[] result = StreamSupport.stream(
		Spliterators.spliteratorUnknownSize(rowStringIterator, 0),
		false).sorted().toArray(String[]::new);
	String[] expected = {
		"1,1.2,20,null,2.3,11,19",
		"10,null,20,50,25.4,10,21",
		"11,1.4,21,null,null,null,null",
		"12,1.2,20,51,null,null,null",
		"14,7.2,10,11,null,null,null",
		"15,6.2,20,21,null,null,null",
		"16,9.2,30,31,null,null,null",
		"2,null,20,50,25.4,10,21",
		"3,1.4,21,null,null,null,null",
		"4,1.2,20,51,null,null,null",
		"6,7.2,10,11,null,null,null",
		"7,6.2,20,21,null,null,null",
		"8,9.2,30,31,null,null,null",
		"9,1.2,20,null,2.3,11,19"
	};
	assertArrayEquals(expected, result);
}

Source File: SimpleFlinkKafkaStreamingCounterDataSourceAVRO.java From jMetalSP with MIT License

5 votes

@Override
public void run() {

    JMetalLogger.logger.info("Run Fink method in the streaming data source invoked") ;
    JMetalLogger.logger.info("TOPIC: " + topic) ;

   // environment.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(1,0));
    //environment.enableCheckpointing(10);

    DataStream<String> data= environment.addSource(
            new FlinkKafkaConsumer010<String>(topic, new SimpleStringSchema(),kafkaParams));
    /**
     * DataDeserializer<Counter> dataDeserializer = new DataDeserializer<>();
     *             //Object o =dataDeserializer.deserialize(value.value(),"avsc/Counter.avsc");
     *             //GenericData.Record rc=(GenericData.Record)o;
     *             Counter counter = dataDeserializer.deserialize(value.value(),"avsc/Counter.avsc");
     */

    try {
        if (data != null) {
            Iterator<String> it = DataStreamUtils.collect(data);

            while (it.hasNext()) {
                byte [] bytes= it.next().getBytes();
                DataDeserializer<Counter> dataDeserializer = new DataDeserializer<>();
                Counter counter = dataDeserializer.deserialize(bytes,"avsc/Counter.avsc");
                Integer number = (Integer) counter.get(0);
                observable.setChanged();
                observable.notifyObservers(new ObservedValue<Integer>(number));
            }

        }
    }catch(Exception e){
        e.printStackTrace();
    }


}

Source File: RegionFailoverITCase.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}

Source File: ReinterpretDataStreamAsKeyedStreamITCase.java From flink with Apache License 2.0

5 votes

/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}

Source File: DataStreamConversionUtilTest.java From flink with Apache License 2.0

5 votes

@Test
public void testBasicConvert() throws Exception {
	StreamExecutionEnvironment env = MLEnvironmentFactory.getDefault().getStreamExecutionEnvironment();
	DataStream<Row> input = env.fromElements(Row.of("a"));
	Table table1 = DataStreamConversionUtil.toTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, input, new String[]{"word"});
	Assert.assertEquals(
		new TableSchema(new String[]{"word"}, new TypeInformation[]{TypeInformation.of(String.class)}),
		table1.getSchema()
	);
	DataStream<Row> rowDataStream = DataStreamConversionUtil.fromTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, table1);
	Iterator<Row> result = DataStreamUtils.collect(rowDataStream);
	Assert.assertEquals(Row.of("a"), result.next());
	Assert.assertFalse(result.hasNext());
}

Source File: TestUserAgentAnalysisMapperClass.java From yauaa with Apache License 2.0

4 votes

@Test
public void testClassDefinitionDataStream() throws Exception {
    StreamExecutionEnvironment environment = LocalStreamEnvironment.getExecutionEnvironment();

    DataStream<TestRecord> resultDataStream = environment
        .fromElements(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",

            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36"
        )

        .map((MapFunction<String, TestRecord>) TestRecord::new)

        .map(new MyUserAgentAnalysisMapper());

    List<TestRecord> result = new ArrayList<>(5);
    DataStreamUtils
        .collect(resultDataStream)
        .forEachRemaining(result::add);

    assertEquals(2, result.size());

    assertThat(result, hasItems(
        new TestRecord(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",
            "Desktop",
            "Chrome 48.0.2564.82",
            null),

        new TestRecord(
            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36",
            "Phone",
            "Chrome 53.0.2785.124",
            null)
    ));
}

Source File: FlinkKafkaShuffle.java From flink with Apache License 2.0

4 votes

/**
 * The read side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>Each consumer task should read kafka partitions equal to the key group indices it is assigned.
 * The number of kafka partitions is the maximum parallelism of the consumer.
 * This version only supports numberOfPartitions = consumerParallelism.
 * In the case of using {@link TimeCharacteristic#EventTime}, a consumer task is responsible to emit
 * watermarks. Watermarks are read from the corresponding Kafka partitions. Notice that a consumer task only starts
 * to emit a watermark after receiving at least one watermark from each producer task to make sure watermarks
 * are monotonically increasing. Hence a consumer task needs to know `producerParallelism` as well.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#writeKeyBy
 *
 * @param topic 			The topic of Kafka where data is persisted
 * @param env 				Execution environment. readKeyBy's environment can be different from writeKeyBy's
 * @param typeInformation 	Type information of the data persisted in Kafka
 * @param kafkaProperties 	kafka properties for Kafka Consumer
 * @param keySelector 		key selector to retrieve key
 * @param <T> 				Schema type
 * @param <K> 				Key type
 * @return Keyed data stream
 */
public static <T, K> KeyedStream<T, K> readKeyBy(
		String topic,
		StreamExecutionEnvironment env,
		TypeInformation<T> typeInformation,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	TypeSerializer<T> typeSerializer = typeInformation.createSerializer(env.getConfig());
	TypeInformationSerializationSchema<T> schema =
		new TypeInformationSerializationSchema<>(typeInformation, typeSerializer);

	SourceFunction<T> kafkaConsumer  =
		new FlinkKafkaShuffleConsumer<>(topic, schema, typeSerializer, kafkaProperties);

	// TODO: consider situations where numberOfPartitions != consumerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PARTITION_NUMBER) != null,
		"Missing partition number for Kafka Shuffle");
	int numberOfPartitions = PropertiesUtil.getInt(kafkaProperties, PARTITION_NUMBER, Integer.MIN_VALUE);
	DataStream<T> outputDataStream = env.addSource(kafkaConsumer).setParallelism(numberOfPartitions);

	return DataStreamUtils.reinterpretAsKeyedStream(outputDataStream, keySelector);
}

Source File: SimpleFlinkKafkaStreamingCounterDataSource.java From jMetalSP with MIT License

3 votes

@Override
public void run() {

    JMetalLogger.logger.info("Run Fink method in the streaming data source invoked") ;
    JMetalLogger.logger.info("TOPIC: " + topic) ;

   // environment.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(1,0));
    //environment.enableCheckpointing(10);

    DataStream<String> data= environment.addSource(
            new FlinkKafkaConsumer010<String>(topic, new SimpleStringSchema(),kafkaParams));




    try {
        Iterator<String> it=DataStreamUtils.collect(data);
        while (it.hasNext()){
            Integer number = Integer.parseInt(it.next());
            //Integer number = it.next();
            observable.setChanged();
            observable.notifyObservers(new ObservedValue<Integer>(number));
        }

    } catch (Exception e){
        e.printStackTrace();
    }


}

org.apache.flink.streaming.api.datastream.DataStreamUtils Java Examples