org.apache.flink.streaming.api.datastream.DataStream Java Exaples

Source File: DataStreamTest.java From flink with Apache License 2.0

7 votes

/**
 * Ensure that WatermarkStrategy is easy to use in the API, without superfluous generics.
 */
@Test
public void testErgonomicWatermarkStrategy() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> input = env.fromElements("bonjour");

	// as soon as you have a chain of methods the first call needs a generic
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.forBoundedOutOfOrderness(Duration.ofMillis(10)));

	// as soon as you have a chain of methods the first call needs to specify the generic type
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.<String>forBoundedOutOfOrderness(Duration.ofMillis(10))
					.withTimestampAssigner((event, timestamp) -> 42L));
}

Source File: Kafka011Example.java From Flink-CEPplus with Apache License 2.0

7 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}

Source File: BucketingSinkFaultToleranceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	BucketingSink<String> sink = new BucketingSink<String>(outPath)
			.setBucketer(new BasePathBucketer<String>())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPartPrefix(PART_PREFIX)
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}

Source File: ReadTextFile.java From blog_demos with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度为1
    env.setParallelism(1);

    //用txt文件作为数据源
    DataStream<String> textDataStream = env.readTextFile("file:///Users/zhaoqin/temp/202003/14/README.txt", "UTF-8");

    //统计单词数量并打印出来
    textDataStream
            .flatMap(new Splitter())
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : readTextFile");
}

Source File: Union.java From da-streamingledger with Apache License 2.0

6 votes

/**
 * Union differently typed {@link DataStream}s into single {@code DataStream}.
 *
 * <p>The resulting {@code DataStream} is of type {@link TaggedElement} where
 * {@link TaggedElement#getDataStreamTag()} corresponds to the list position of the source {@code DataStream} in
 * {@code inputs} that produced that element, and {@link TaggedElement#getElement()} is the element produced.
 *
 * @param inputs the input data streams to union.
 * @return a {@code DataStream} that corresponds to the union of all the input {@link DataStream}s
 */
public static DataStream<TaggedElement> apply(List<DataStream<?>> inputs) {
    checkArgument(!inputs.isEmpty(), "union requires at least one input data stream.");

    List<DataStream<TaggedElement>> taggedInputs = tagInputStreams(inputs);
    if (taggedInputs.size() == 1) {
        return taggedInputs.get(0);
    }
    DataStream<TaggedElement> first = taggedInputs.get(0);
    List<DataStream<TaggedElement>> restList = taggedInputs.subList(1, taggedInputs.size());

    @SuppressWarnings({"unchecked", "raw"})
    DataStream<TaggedElement>[] restArray = (DataStream<TaggedElement>[]) new DataStream[restList.size()];
    DataStream<TaggedElement>[] rest = restList.toArray(restArray);
    return first.union(rest);
}

Source File: AbstractNonKeyedOperatorRestoreTestBase.java From flink with Apache License 2.0

6 votes

@Override
public void createMigrationJob(StreamExecutionEnvironment env) {
	/**
	 * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3)
	 */
	DataStream<Integer> source = createSource(env, ExecutionMode.MIGRATE);

	SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.MIGRATE, source);
	first.startNewChain();

	SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.MIGRATE, first);
	second.startNewChain();

	SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second);

	SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.MIGRATE, stateless);
}

Source File: GroupedProcessingTimeWindowExample.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);

		DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource());

		stream
			.keyBy(0)
			.timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
			.reduce(new SummingReducer())

			// alternative: use a apply function which does not pre-aggregate
//			.keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>())
//			.window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS))
//			.apply(new SummingWindowFunction())

			.addSink(new SinkFunction<Tuple2<Long, Long>>() {
				@Override
				public void invoke(Tuple2<Long, Long> value) {
				}
			});

		env.execute();
	}

Source File: StreamWordCountExample.java From toolbox with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .fromElements("Who's there?",
                        "I think I hear them. Stand, ho! Who's there?")
                //.socketTextStream("localhost", 9999)
                .flatMap(new Splitter())
                .keyBy(0)
                .sum(1);

        dataStream.print();

        env.execute();

        //env.execute("Socket Stream WordCount");
    }

Source File: IntervalJoinITCase.java From flink with Apache License 2.0

6 votes

@Test(expected = UnsupportedTimeCharacteristicException.class)
public void testExecutionFailsInProcessingTime() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
	env.setParallelism(1);

	DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1));
	DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1));

	streamOne.keyBy(new Tuple2KeyExtractor())
		.intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor()))
		.between(Time.milliseconds(0), Time.milliseconds(0))
		.process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {
			@Override
			public void processElement(Tuple2<String, Integer> left,
				Tuple2<String, Integer> right, Context ctx,
				Collector<String> out) throws Exception {
				out.collect(left + ":" + right);
			}
		});
}

Source File: CollectITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCollect() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	final long n = 10;
	DataStream<Long> stream = env.generateSequence(1, n);

	long i = 1;
	for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) {
		long x = it.next();
		assertEquals("received wrong element", i, x);
		i++;
	}

	assertEquals("received wrong number of elements", n + 1, i);
}

Source File: WindowTranslationTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
@SuppressWarnings("rawtypes")
public void testReduceEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.reduce(new DummyReducer());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}

Source File: StreamTaskTimerITCase.java From flink with Apache License 2.0

6 votes

/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOperatorChainedToSource() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));

	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		verifyJobExecutionException(e);
	}
}

Source File: SiddhiCEPITCase.java From bahir-flink with Apache License 2.0

6 votes

@Test
public void testUnboundedTupleSourceAndReturnTuple() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple4<Integer, String, Double, Long>> input = env
        .addSource(new RandomTupleSource(5).closeDelay(1500)).keyBy(1);

    DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream");

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}

Source File: FlinkPulsarITest.java From pulsar-flink with Apache License 2.0

6 votes

@Test
public void testStartFromSpecific() throws Exception {
    String topic = newTopic();
    List<MessageId> mids = sendTypedMessages(topic, SchemaType.INT32, Arrays.asList(
            //  0,   1,   2, 3, 4, 5,  6,  7,  8
            -20, -21, -22, 1, 2, 3, 10, 11, 12), Optional.empty());

    Map<String, Set<Integer>> expectedData = new HashMap<>();
    expectedData.put(topic, new HashSet<>(Arrays.asList(2, 3, 10, 11, 12)));

    Map<String, MessageId> offset = new HashMap<>();
    offset.put(topic, mids.get(3));

    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(1);

    Properties sourceProps = sourceProperties();
    sourceProps.setProperty(TOPIC_SINGLE_OPTION_KEY, topic);
    DataStream stream = see.addSource(
            new FlinkPulsarRowSource(serviceUrl, adminUrl, sourceProps).setStartFromSpecificOffsets(offset));
    stream.flatMap(new CheckAllMessageExist(expectedData, 5)).setParallelism(1);

    TestUtils.tryExecute(see, "start from specific");
}

Source File: KafkaTestEnvironmentImpl.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer<T>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}

Source File: KafkaTopicValidator.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}

Source File: KafkaTestEnvironmentImpl.java From flink with Apache License 2.0

5 votes

@Override
public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) {
	return stream.addSink(new FlinkKafkaProducer011<>(
		topic,
		serSchema,
		props,
		Optional.ofNullable(partitioner),
		producerSemantic,
		FlinkKafkaProducer011.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE));
}

Source File: MyRetractStreamTableSink.java From flink-learning with Apache License 2.0

5 votes

@Override
public DataStreamSink<?> consumeDataStream(DataStream<Tuple2<Boolean, Row>> dataStream) {
    return dataStream.addSink(new SinkFunction<Tuple2<Boolean, Row>>() {
        @Override
        public void invoke(Tuple2<Boolean, Row> value, Context context) throws Exception {
            //自定义Sink
            // f0==true :插入新数据
            // f0==false:删除旧数据
            if (value.f0) {
                //可以写入MySQL、Kafka或者发HttpPost...根据具体情况开发
                System.out.println(value.f1);
            }
        }
    });
}

Source File: KafkaTableSourceBase.java From flink with Apache License 2.0

5 votes

/**
 * NOTE: This method is for internal use only for defining a TableSource.
 *       Do not use it in Table API programs.
 */
@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {

	DeserializationSchema<Row> deserializationSchema = getDeserializationSchema();
	// Version-specific Kafka consumer
	FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema);
	return env.addSource(kafkaConsumer).name(explainSource());
}

Source File: DataStreamConversionUtil.java From Alink with Apache License 2.0

5 votes

/**
 * Convert the given DataStream to Table with specified colNames.
 *
 * @param session the MLEnvironment using to convert DataSet to Table.
 * @param data     the DataStream to convert.
 * @param colNames the specified colNames.
 * @return the converted Table.
 */
public static Table toTable(MLEnvironment session, DataStream <Row> data, String[] colNames) {
	if (null == colNames || colNames.length == 0) {
		return session.getStreamTableEnvironment().fromDataStream(data);
	} else {
		StringBuilder sbd = new StringBuilder();
		sbd.append(colNames[0]);
		for (int i = 1; i < colNames.length; i++) {
			sbd.append(",").append(colNames[i]);
		}
		return session.getStreamTableEnvironment().fromDataStream(data, sbd.toString());
	}
}

Source File: KafkaShuffleITCase.java From flink with Apache License 2.0

5 votes

private Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> testKafkaShuffleProducer(
		String topic,
		StreamExecutionEnvironment env,
		int numberOfPartitions,
		int producerParallelism,
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	createTestTopic(topic, numberOfPartitions, 1);

	env.setParallelism(producerParallelism);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(timeCharacteristic);

	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor()).setParallelism(producerParallelism) : source;

	Properties properties = kafkaServer.getStandardProperties();
	Properties kafkaProperties = PropertiesUtil.flatten(properties);

	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));
	kafkaProperties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	kafkaProperties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0);

	env.execute("Write to " + topic);
	ImmutableMap.Builder<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = ImmutableMap.builder();

	for (int p = 0; p < numberOfPartitions; p++) {
		results.put(p, kafkaServer.getAllRecordsFromTopic(kafkaProperties, topic, p, 5000));
	}

	deleteTestTopic(topic);

	return results.build();
}

Source File: AllWindowTranslationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
@SuppressWarnings("rawtypes")
public void testFoldWithProcessAllWindowFunctionEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window = source
			.windowAll(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.fold(new Tuple3<>("", "", 0), new DummyFolder(), new ProcessAllWindowFunction<Tuple3<String, String, Integer>, Tuple2<String, Integer>, TimeWindow>() {
				private static final long serialVersionUID = 1L;
				@Override
				public void process(
						Context ctx,
						Iterable<Tuple3<String, String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple3<String, String, Integer> in : values) {
						out.collect(new Tuple2<>(in.f0, in.f2));
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof FoldingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Source File: WindowTranslationTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
@SuppressWarnings("rawtypes")
public void testProcessWithCustomTrigger() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.trigger(CountTrigger.of(1))
			.process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(String key,
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(in);
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}

Source File: SiddhiCEP.java From bahir-flink with Apache License 2.0

5 votes

/**
 * Register stream with unique <code>streaId</code>, source <code>dataStream</code> and schema fields,
 * and select the registered stream as initial stream to connect to Siddhi Runtime.
 *
 * @see #registerStream(String, DataStream, String...)
 * @see #from(String)
 */
public <T> SiddhiStream.SingleSiddhiStream<T> from(String streamId, DataStream<T> dataStream, String... fieldNames) {
    Preconditions.checkNotNull(streamId,"streamId");
    Preconditions.checkNotNull(dataStream,"dataStream");
    Preconditions.checkNotNull(fieldNames,"fieldNames");
    this.registerStream(streamId, dataStream, fieldNames);
    return new SiddhiStream.SingleSiddhiStream<>(streamId, this);
}

Source File: InsideDataSource.java From flink-simple-tutorial with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


        // 添加数组作为数据输入源
        String[] elementInput = new String[]{"hello Flink", "Second Line"};
        DataStream<String> text = env.fromElements(elementInput);

        // 添加List集合作为数据输入源
        List<String> collectionInput = new ArrayList<>();
        collectionInput.add("hello Flink");
        DataStream<String> text2 = env.fromCollection(collectionInput);

        // 添加Socket作为数据输入源
        // 4个参数 -> (hostname:Ip地址, port:端口, delimiter:分隔符, maxRetry:最大重试次数)
        DataStream<String> text3 = env.socketTextStream("localhost", 9999, "\n", 4);


        // 添加文件源
        // 直接读取文本文件
        DataStream<String> text4 = env.readTextFile("/opt/history.log");
        // 指定 CsvInputFormat, 监控csv文件(两种模式), 时间间隔是10ms
        DataStream<String> text5 = env.readFile(new CsvInputFormat<String>(new Path("/opt/history.csv")) {
            @Override
            protected String fillRecord(String s, Object[] objects) {
                return null;
            }
        },"/opt/history.csv", FileProcessingMode.PROCESS_CONTINUOUSLY,10);

        text.print();

        env.execute("Inside DataSource Demo");
    }

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraphForManagedMemoryFractionTest(
	final List<ResourceSpec> resourceSpecs,
	@Nullable final List<Integer> managedMemoryWeights) throws Exception {

	final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) {
		}

		@Override
		public void cancel() {
		}
	});
	opMethod.invoke(source, resourceSpecs.get(0));

	// CHAIN(source -> map1) in default slot sharing group
	final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map1, resourceSpecs.get(1));

	// CHAIN(map2) in default slot sharing group
	final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map2, resourceSpecs.get(2));

	// CHAIN(map3) in test slot sharing group
	final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test");
	opMethod.invoke(map3, resourceSpecs.get(3));

	if (managedMemoryWeights != null) {
		source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0));
		map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1));
		map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2));
		map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3));
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}

Source File: IterateITCase.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}

Source File: StreamingOperatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableObjectReuse();
	DataStream<Integer> input = env.fromElements(1, 2, 3);
	input.flatMap(new FlatMapFunction<Integer, Integer>() {
		@Override
		public void flatMap(Integer value, Collector<Integer> out) throws Exception {
			out.collect(value << 1);
		}
	});
	env.execute();
}

Source File: NumSeqSourceStreamOp.java From Alink with Apache License 2.0

5 votes

public NumSeqSourceStreamOp(long from, long to, String colName, double timePerSample, Params params) {
    super(params);

    DataStreamSource<Long> seq = MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamExecutionEnvironment().generateSequence(from, to);
    DataStream<Long> data = seq.map(new transform(new Double[]{timePerSample}));

    this.setOutputTable(MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamTableEnvironment().fromDataStream(data, colName));
}

org.apache.flink.streaming.api.datastream.DataStream Java Examples