org.apache.flink.streaming.api.datastream.DataStream Java Examples
The following examples show how to use
org.apache.flink.streaming.api.datastream.DataStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataStreamTest.java From flink with Apache License 2.0 | 7 votes |
/** * Ensure that WatermarkStrategy is easy to use in the API, without superfluous generics. */ @Test public void testErgonomicWatermarkStrategy() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> input = env.fromElements("bonjour"); // as soon as you have a chain of methods the first call needs a generic input.assignTimestampsAndWatermarks( WatermarkStrategy .forBoundedOutOfOrderness(Duration.ofMillis(10))); // as soon as you have a chain of methods the first call needs to specify the generic type input.assignTimestampsAndWatermarks( WatermarkStrategy .<String>forBoundedOutOfOrderness(Duration.ofMillis(10)) .withTimestampAssigner((event, timestamp) -> 42L)); }
Example #2
Source File: Kafka011Example.java From Flink-CEPplus with Apache License 2.0 | 7 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer011<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer011<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.11 Example"); }
Example #3
Source File: BucketingSinkFaultToleranceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void testProgram(StreamExecutionEnvironment env) { assertTrue("Broken test setup", NUM_STRINGS % 40 == 0); env.enableCheckpointing(20); env.setParallelism(12); env.disableOperatorChaining(); DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain(); DataStream<String> mapped = stream .map(new OnceFailingIdentityMapper(NUM_STRINGS)); BucketingSink<String> sink = new BucketingSink<String>(outPath) .setBucketer(new BasePathBucketer<String>()) .setBatchSize(10000) .setValidLengthPrefix("") .setPartPrefix(PART_PREFIX) .setPendingPrefix("") .setPendingSuffix(PENDING_SUFFIX) .setInProgressSuffix(IN_PROGRESS_SUFFIX); mapped.addSink(sink); }
Example #4
Source File: ReadTextFile.java From blog_demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //设置并行度为1 env.setParallelism(1); //用txt文件作为数据源 DataStream<String> textDataStream = env.readTextFile("file:///Users/zhaoqin/temp/202003/14/README.txt", "UTF-8"); //统计单词数量并打印出来 textDataStream .flatMap(new Splitter()) .keyBy(0) .sum(1) .print(); env.execute("API DataSource demo : readTextFile"); }
Example #5
Source File: Union.java From da-streamingledger with Apache License 2.0 | 6 votes |
/** * Union differently typed {@link DataStream}s into single {@code DataStream}. * * <p>The resulting {@code DataStream} is of type {@link TaggedElement} where * {@link TaggedElement#getDataStreamTag()} corresponds to the list position of the source {@code DataStream} in * {@code inputs} that produced that element, and {@link TaggedElement#getElement()} is the element produced. * * @param inputs the input data streams to union. * @return a {@code DataStream} that corresponds to the union of all the input {@link DataStream}s */ public static DataStream<TaggedElement> apply(List<DataStream<?>> inputs) { checkArgument(!inputs.isEmpty(), "union requires at least one input data stream."); List<DataStream<TaggedElement>> taggedInputs = tagInputStreams(inputs); if (taggedInputs.size() == 1) { return taggedInputs.get(0); } DataStream<TaggedElement> first = taggedInputs.get(0); List<DataStream<TaggedElement>> restList = taggedInputs.subList(1, taggedInputs.size()); @SuppressWarnings({"unchecked", "raw"}) DataStream<TaggedElement>[] restArray = (DataStream<TaggedElement>[]) new DataStream[restList.size()]; DataStream<TaggedElement>[] rest = restList.toArray(restArray); return first.union(rest); }
Example #6
Source File: AbstractNonKeyedOperatorRestoreTestBase.java From flink with Apache License 2.0 | 6 votes |
@Override public void createMigrationJob(StreamExecutionEnvironment env) { /** * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3) */ DataStream<Integer> source = createSource(env, ExecutionMode.MIGRATE); SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.MIGRATE, source); first.startNewChain(); SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.MIGRATE, first); second.startNewChain(); SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second); SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.MIGRATE, stateless); }
Example #7
Source File: GroupedProcessingTimeWindowExample.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource()); stream .keyBy(0) .timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) .reduce(new SummingReducer()) // alternative: use a apply function which does not pre-aggregate // .keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>()) // .window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) // .apply(new SummingWindowFunction()) .addSink(new SinkFunction<Tuple2<Long, Long>>() { @Override public void invoke(Tuple2<Long, Long> value) { } }); env.execute(); }
Example #8
Source File: StreamWordCountExample.java From toolbox with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> dataStream = env .fromElements("Who's there?", "I think I hear them. Stand, ho! Who's there?") //.socketTextStream("localhost", 9999) .flatMap(new Splitter()) .keyBy(0) .sum(1); dataStream.print(); env.execute(); //env.execute("Socket Stream WordCount"); }
Example #9
Source File: IntervalJoinITCase.java From flink with Apache License 2.0 | 6 votes |
@Test(expected = UnsupportedTimeCharacteristicException.class) public void testExecutionFailsInProcessingTime() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne.keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), Time.milliseconds(0)) .process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() { @Override public void processElement(Tuple2<String, Integer> left, Tuple2<String, Integer> right, Context ctx, Collector<String> out) throws Exception { out.collect(left + ":" + right); } }); }
Example #10
Source File: CollectITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCollect() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); final long n = 10; DataStream<Long> stream = env.generateSequence(1, n); long i = 1; for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) { long x = it.next(); assertEquals("received wrong element", i, x); i++; } assertEquals("received wrong number of elements", n + 1, i); }
Example #11
Source File: WindowTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("rawtypes") public void testReduceEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(new TupleKeySelector()) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
Example #12
Source File: StreamTaskTimerITCase.java From flink with Apache License 2.0 | 6 votes |
/** * Note: this test fails if we don't check for exceptions in the source contexts and do not * synchronize in the source contexts. */ @Test public void testOperatorChainedToSource() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(timeCharacteristic); env.setParallelism(1); DataStream<String> source = env.addSource(new InfiniteTestSource()); source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS)); try { env.execute("Timer test"); } catch (JobExecutionException e) { verifyJobExecutionException(e); } }
Example #13
Source File: SiddhiCEPITCase.java From bahir-flink with Apache License 2.0 | 6 votes |
@Test public void testUnboundedTupleSourceAndReturnTuple() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple4<Integer, String, Double, Long>> input = env .addSource(new RandomTupleSource(5).closeDelay(1500)).keyBy(1); DataStream<Tuple4<Long, Integer, String, Double>> output = SiddhiCEP .define("inputStream", input, "id", "name", "price", "timestamp") .cql("from inputStream select timestamp, id, name, price insert into outputStream") .returns("outputStream"); String resultPath = tempFolder.newFile().toURI().toString(); output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE); env.execute(); assertEquals(5, getLineCount(resultPath)); }
Example #14
Source File: FlinkPulsarITest.java From pulsar-flink with Apache License 2.0 | 6 votes |
@Test public void testStartFromSpecific() throws Exception { String topic = newTopic(); List<MessageId> mids = sendTypedMessages(topic, SchemaType.INT32, Arrays.asList( // 0, 1, 2, 3, 4, 5, 6, 7, 8 -20, -21, -22, 1, 2, 3, 10, 11, 12), Optional.empty()); Map<String, Set<Integer>> expectedData = new HashMap<>(); expectedData.put(topic, new HashSet<>(Arrays.asList(2, 3, 10, 11, 12))); Map<String, MessageId> offset = new HashMap<>(); offset.put(topic, mids.get(3)); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.getConfig().disableSysoutLogging(); see.setParallelism(1); Properties sourceProps = sourceProperties(); sourceProps.setProperty(TOPIC_SINGLE_OPTION_KEY, topic); DataStream stream = see.addSource( new FlinkPulsarRowSource(serviceUrl, adminUrl, sourceProps).setStartFromSpecificOffsets(offset)); stream.flatMap(new CheckAllMessageExist(expectedData, 5)).setParallelism(1); TestUtils.tryExecute(see, "start from specific"); }
Example #15
Source File: KafkaTestEnvironmentImpl.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) { return stream.addSink(new FlinkKafkaProducer<T>( topic, serSchema, props, Optional.ofNullable(partitioner), producerSemantic, FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE)); }
Example #16
Source File: KafkaTopicValidator.java From yahoo-streaming-benchmark with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(parameterTool); DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>( parameterTool.getRequired("kafka.topic"), new SimpleStringSchema(), parameterTool.getProperties())); rawMessageStream.print(); env.execute(); }
Example #17
Source File: KafkaTestEnvironmentImpl.java From flink with Apache License 2.0 | 5 votes |
@Override public <T> DataStreamSink<T> produceIntoKafka(DataStream<T> stream, String topic, KeyedSerializationSchema<T> serSchema, Properties props, FlinkKafkaPartitioner<T> partitioner) { return stream.addSink(new FlinkKafkaProducer011<>( topic, serSchema, props, Optional.ofNullable(partitioner), producerSemantic, FlinkKafkaProducer011.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE)); }
Example #18
Source File: MyRetractStreamTableSink.java From flink-learning with Apache License 2.0 | 5 votes |
@Override public DataStreamSink<?> consumeDataStream(DataStream<Tuple2<Boolean, Row>> dataStream) { return dataStream.addSink(new SinkFunction<Tuple2<Boolean, Row>>() { @Override public void invoke(Tuple2<Boolean, Row> value, Context context) throws Exception { //自定义Sink // f0==true :插入新数据 // f0==false:删除旧数据 if (value.f0) { //可以写入MySQL、Kafka或者发HttpPost...根据具体情况开发 System.out.println(value.f1); } } }); }
Example #19
Source File: KafkaTableSourceBase.java From flink with Apache License 2.0 | 5 votes |
/** * NOTE: This method is for internal use only for defining a TableSource. * Do not use it in Table API programs. */ @Override public DataStream<Row> getDataStream(StreamExecutionEnvironment env) { DeserializationSchema<Row> deserializationSchema = getDeserializationSchema(); // Version-specific Kafka consumer FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema); return env.addSource(kafkaConsumer).name(explainSource()); }
Example #20
Source File: DataStreamConversionUtil.java From Alink with Apache License 2.0 | 5 votes |
/** * Convert the given DataStream to Table with specified colNames. * * @param session the MLEnvironment using to convert DataSet to Table. * @param data the DataStream to convert. * @param colNames the specified colNames. * @return the converted Table. */ public static Table toTable(MLEnvironment session, DataStream <Row> data, String[] colNames) { if (null == colNames || colNames.length == 0) { return session.getStreamTableEnvironment().fromDataStream(data); } else { StringBuilder sbd = new StringBuilder(); sbd.append(colNames[0]); for (int i = 1; i < colNames.length; i++) { sbd.append(",").append(colNames[i]); } return session.getStreamTableEnvironment().fromDataStream(data, sbd.toString()); } }
Example #21
Source File: KafkaShuffleITCase.java From flink with Apache License 2.0 | 5 votes |
private Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> testKafkaShuffleProducer( String topic, StreamExecutionEnvironment env, int numberOfPartitions, int producerParallelism, int numElementsPerProducer, TimeCharacteristic timeCharacteristic) throws Exception { createTestTopic(topic, numberOfPartitions, 1); env.setParallelism(producerParallelism); env.setRestartStrategy(RestartStrategies.noRestart()); env.setStreamTimeCharacteristic(timeCharacteristic); DataStream<Tuple3<Integer, Long, Integer>> source = env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)).setParallelism(producerParallelism); DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ? source.assignTimestampsAndWatermarks(new PunctuatedExtractor()).setParallelism(producerParallelism) : source; Properties properties = kafkaServer.getStandardProperties(); Properties kafkaProperties = PropertiesUtil.flatten(properties); kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism)); kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions)); kafkaProperties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); kafkaProperties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0); env.execute("Write to " + topic); ImmutableMap.Builder<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = ImmutableMap.builder(); for (int p = 0; p < numberOfPartitions; p++) { results.put(p, kafkaServer.getAllRecordsFromTopic(kafkaProperties, topic, p, 5000)); } deleteTestTopic(topic); return results.build(); }
Example #22
Source File: AllWindowTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("rawtypes") public void testFoldWithProcessAllWindowFunctionEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window = source .windowAll(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .fold(new Tuple3<>("", "", 0), new DummyFolder(), new ProcessAllWindowFunction<Tuple3<String, String, Integer>, Tuple2<String, Integer>, TimeWindow>() { private static final long serialVersionUID = 1L; @Override public void process( Context ctx, Iterable<Tuple3<String, String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception { for (Tuple3<String, String, Integer> in : values) { out.collect(new Tuple2<>(in.f0, in.f2)); } } }); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof FoldingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
Example #23
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test setting shuffle mode to {@link ShuffleMode#BATCH}. */ @Test public void testShuffleModeBatch() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // fromElements -> Map -> Print DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3); DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>( sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH)); DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1); DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>( mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH)); partitionAfterMapDataStream.print().setParallelism(2); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources(); assertEquals(3, verticesSorted.size()); // it can not be chained with BATCH shuffle mode JobVertex sourceVertex = verticesSorted.get(0); JobVertex mapVertex = verticesSorted.get(1); // BATCH shuffle mode is translated into BLOCKING result partition assertEquals(ResultPartitionType.BLOCKING, sourceVertex.getProducedDataSets().get(0).getResultType()); assertEquals(ResultPartitionType.BLOCKING, mapVertex.getProducedDataSets().get(0).getResultType()); }
Example #24
Source File: WindowTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("rawtypes") public void testProcessWithCustomTrigger() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(new TupleKeySelector()) .window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .trigger(CountTrigger.of(1)) .process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L; @Override public void process(String key, Context ctx, Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception { for (Tuple2<String, Integer> in : values) { out.collect(in); } } }); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
Example #25
Source File: SiddhiCEP.java From bahir-flink with Apache License 2.0 | 5 votes |
/** * Register stream with unique <code>streaId</code>, source <code>dataStream</code> and schema fields, * and select the registered stream as initial stream to connect to Siddhi Runtime. * * @see #registerStream(String, DataStream, String...) * @see #from(String) */ public <T> SiddhiStream.SingleSiddhiStream<T> from(String streamId, DataStream<T> dataStream, String... fieldNames) { Preconditions.checkNotNull(streamId,"streamId"); Preconditions.checkNotNull(dataStream,"dataStream"); Preconditions.checkNotNull(fieldNames,"fieldNames"); this.registerStream(streamId, dataStream, fieldNames); return new SiddhiStream.SingleSiddhiStream<>(streamId, this); }
Example #26
Source File: InsideDataSource.java From flink-simple-tutorial with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 添加数组作为数据输入源 String[] elementInput = new String[]{"hello Flink", "Second Line"}; DataStream<String> text = env.fromElements(elementInput); // 添加List集合作为数据输入源 List<String> collectionInput = new ArrayList<>(); collectionInput.add("hello Flink"); DataStream<String> text2 = env.fromCollection(collectionInput); // 添加Socket作为数据输入源 // 4个参数 -> (hostname:Ip地址, port:端口, delimiter:分隔符, maxRetry:最大重试次数) DataStream<String> text3 = env.socketTextStream("localhost", 9999, "\n", 4); // 添加文件源 // 直接读取文本文件 DataStream<String> text4 = env.readTextFile("/opt/history.log"); // 指定 CsvInputFormat, 监控csv文件(两种模式), 时间间隔是10ms DataStream<String> text5 = env.readFile(new CsvInputFormat<String>(new Path("/opt/history.csv")) { @Override protected String fillRecord(String s, Object[] objects) { return null; } },"/opt/history.csv", FileProcessingMode.PROCESS_CONTINUOUSLY,10); text.print(); env.execute("Inside DataSource Demo"); }
Example #27
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
private JobGraph createJobGraphForManagedMemoryFractionTest( final List<ResourceSpec> resourceSpecs, @Nullable final List<Integer> managedMemoryWeights) throws Exception { final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() { @Override public void run(SourceContext<Integer> ctx) { } @Override public void cancel() { } }); opMethod.invoke(source, resourceSpecs.get(0)); // CHAIN(source -> map1) in default slot sharing group final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value); opMethod.invoke(map1, resourceSpecs.get(1)); // CHAIN(map2) in default slot sharing group final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value); opMethod.invoke(map2, resourceSpecs.get(2)); // CHAIN(map3) in test slot sharing group final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test"); opMethod.invoke(map3, resourceSpecs.get(3)); if (managedMemoryWeights != null) { source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0)); map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1)); map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2)); map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3)); } return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); }
Example #28
Source File: IterateITCase.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Test public void testSimpleIteration() throws Exception { int numRetries = 5; int timeoutScale = 1; for (int numRetry = 0; numRetry < numRetries; numRetry++) { try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); iterated = new boolean[parallelism]; DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false)) .map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale); DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap); iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink()); iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink()); env.execute(); for (boolean iter : iterated) { assertTrue(iter); } break; // success } catch (Throwable t) { LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t); if (numRetry >= numRetries - 1) { throw t; } else { timeoutScale *= 2; } } } }
Example #29
Source File: StreamingOperatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); DataStream<Integer> input = env.fromElements(1, 2, 3); input.flatMap(new FlatMapFunction<Integer, Integer>() { @Override public void flatMap(Integer value, Collector<Integer> out) throws Exception { out.collect(value << 1); } }); env.execute(); }
Example #30
Source File: NumSeqSourceStreamOp.java From Alink with Apache License 2.0 | 5 votes |
public NumSeqSourceStreamOp(long from, long to, String colName, double timePerSample, Params params) { super(params); DataStreamSource<Long> seq = MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamExecutionEnvironment().generateSequence(from, to); DataStream<Long> data = seq.map(new transform(new Double[]{timePerSample})); this.setOutputTable(MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamTableEnvironment().fromDataStream(data, colName)); }