org.apache.flink.streaming.api.environment.StreamExecutionEnvironment Java Examples
The following examples show how to use
org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TumblingWindow.java From flink-simple-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 构建输入数据 List<Tuple2<String, Long>> data = new ArrayList<>(); Tuple2<String, Long> a = new Tuple2<>("first event", 1L); Tuple2<String, Long> b = new Tuple2<>("second event", 2L); data.add(a); data.add(b); DataStreamSource<Tuple2<String, Long>> input = env.fromCollection(data); // 使用 ProcessTime 滚动窗口, 10s 为一个窗口长度 input.keyBy(x -> x.f1) .window(TumblingProcessingTimeWindows.of(Time.seconds(10))) .reduce(new MyWindowFunction()); env.execute(); }
Example #2
Source File: TestFilterEdges.java From gelly-streaming with Apache License 2.0 | 6 votes |
@Test public void testWithEmptyFilter() throws Exception { /* * Test filterEdges() with a filter that constantly returns true */ final String resultPath = getTempDirPath("result"); final String expectedResult = "1,2,12\n" + "1,3,13\n" + "2,3,23\n" + "3,4,34\n" + "3,5,35\n" + "4,5,45\n" + "5,1,51\n"; final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(GraphStreamTestUtils.getLongLongEdgeDataStream(env), env); graph.filterEdges(new EmptyFilter()) .getEdges().writeAsCsv(resultPath, FileSystem.WriteMode.OVERWRITE); env.execute(); compareResultsByLinesInMemory(expectedResult, resultPath); }
Example #3
Source File: IntervalJoinITCase.java From flink with Apache License 2.0 | 6 votes |
@Test(expected = UnsupportedTimeCharacteristicException.class) public void testExecutionFailsInProcessingTime() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne.keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), Time.milliseconds(0)) .process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() { @Override public void processElement(Tuple2<String, Integer> left, Tuple2<String, Integer> right, Context ctx, Collector<String> out) throws Exception { out.collect(left + ":" + right); } }); }
Example #4
Source File: AllWindowTranslationTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSessionWithFoldFails() throws Exception { // verify that fold does not work with merging windows StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); AllWindowedStream<String, TimeWindow> windowedStream = env.fromElements("Hello", "Ciao") .windowAll(EventTimeSessionWindows.withGap(Time.seconds(5))); try { windowedStream.fold("", new FoldFunction<String, String>() { private static final long serialVersionUID = -4567902917104921706L; @Override public String fold(String accumulator, String value) throws Exception { return accumulator; } }); } catch (UnsupportedOperationException e) { // expected // use a catch to ensure that the exception is thrown by the fold return; } fail("The fold call should fail."); }
Example #5
Source File: AbstractNonKeyedOperatorRestoreTestBase.java From flink with Apache License 2.0 | 6 votes |
@Override public void createMigrationJob(StreamExecutionEnvironment env) { /** * Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3) */ DataStream<Integer> source = createSource(env, ExecutionMode.MIGRATE); SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.MIGRATE, source); first.startNewChain(); SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.MIGRATE, first); second.startNewChain(); SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second); SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.MIGRATE, stateless); }
Example #6
Source File: SocketWindowWordCount.java From 163-bigdate-note with GNU General Public License v3.0 | 6 votes |
public static void main(String[] args) throws Exception{ // 创建 execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 通过连接 socket 获取输入数据,这里连接到本地 9000 端口,如果 9000 端口已被占用,请换一个端口 DataStream<String> text = env.socketTextStream("localhost", 9000, "\n"); // 解析数据,按 word 分组,开窗,聚合 DataStream<Tuple2<String, Integer>> windowCounts = text. flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() { @Override public void flatMap(String value, Collector<Tuple2<String, Integer>> out) { for (String word : value.split("\\s")) { out.collect(Tuple2.of(word, 1)); } } }) .keyBy(0) .timeWindow(Time.seconds(5)) .sum(1); // 将结果打印到控制台,注意这里使用的是单线程打印,而非多线程 windowCounts.print().setParallelism(1); env.execute("Socket Window WordCount"); }
Example #7
Source File: StreamingJobGraphGeneratorNodeHashTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests that a changed operator name does not affect the hash. */ @Test public void testChangedOperatorName() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.addSource(new NoOpSourceFunction(), "A").map(new NoOpMapFunction()); JobGraph jobGraph = env.getStreamGraph().getJobGraph(); JobVertexID expected = jobGraph.getVerticesAsArray()[0].getID(); env = StreamExecutionEnvironment.createLocalEnvironment(); env.addSource(new NoOpSourceFunction(), "B").map(new NoOpMapFunction()); jobGraph = env.getStreamGraph().getJobGraph(); JobVertexID actual = jobGraph.getVerticesAsArray()[0].getID(); assertEquals(expected, actual); }
Example #8
Source File: WindowTranslationTest.java From flink with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("rawtypes") public void testReduceEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(new TupleKeySelector()) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
Example #9
Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0 | 6 votes |
/** * Test slot sharing is enabled. */ @Test public void testEnableSlotSharing() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3); DataStream<Integer> mapDataStream = sourceDataStream.map(x -> x + 1); final List<Transformation<?>> transformations = new ArrayList<>(); transformations.add(sourceDataStream.getTransformation()); transformations.add(mapDataStream.getTransformation()); // all stream nodes share default group by default StreamGraph streamGraph = new StreamGraphGenerator( transformations, env.getConfig(), env.getCheckpointConfig()) .generate(); Collection<StreamNode> streamNodes = streamGraph.getStreamNodes(); for (StreamNode streamNode : streamNodes) { assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, streamNode.getSlotSharingGroup()); } }
Example #10
Source File: AggregateFunctionDemo.java From flink-simple-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 构建输入数据 List<Tuple2<String, Long>> data = new ArrayList<>(); Tuple2<String, Long> a = new Tuple2<>("first event", 1L); Tuple2<String, Long> b = new Tuple2<>("second event", 2L); data.add(a); data.add(b); DataStreamSource<Tuple2<String, Long>> input = env.fromCollection(data); input.keyBy(x -> x.f1) .timeWindow(Time.seconds(10), Time.seconds(1)) // 自定义一个AggregateFunciton, 将相同标号 f1 的数据的 f0字符串字段合并在一起 // ("hello", 1L) + ("world", 1L) = ("hello world", 1L) .aggregate(new MyAggregateFunction()); env.execute(); }
Example #11
Source File: StreamingJob.java From blog_demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<String> text = env.socketTextStream("127.0.0.1", 18081, "\n"); DataStream<WordWithCount> windowCount = text.flatMap(new FlatMapFunction<String, WordWithCount>() { public void flatMap(String value, Collector<WordWithCount> out) throws Exception { String[] splits = value.split("\\s"); for (String word:splits) { out.collect(new WordWithCount(word,1L)); } } }) .keyBy("word") .timeWindow(Time.seconds(5),Time.seconds(1)) .sum("count"); windowCount.print().setParallelism(1); env.execute("Flink Streaming Java API Skeleton"); }
Example #12
Source File: CassandraTupleWriteAheadSinkExample.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.enableCheckpointing(1000); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000)); env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend")); CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource())) .setQuery("INSERT INTO example.values (id, counter) values (?, ?);") .enableWriteAheadLog() .setClusterBuilder(new ClusterBuilder() { private static final long serialVersionUID = 2793938419775311824L; @Override public Cluster buildCluster(Cluster.Builder builder) { return builder.addContactPoint("127.0.0.1").build(); } }) .build(); sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello"); env.execute(); }
Example #13
Source File: SavepointTestBase.java From flink with Apache License 2.0 | 6 votes |
public <T> String takeSavepoint(Collection<T> data, Function<SourceFunction<T>, StreamExecutionEnvironment> jobGraphFactory) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableClosureCleaner(); WaitingSource<T> waitingSource = createSource(data); JobGraph jobGraph = jobGraphFactory.apply(waitingSource).getStreamGraph().getJobGraph(); JobID jobId = jobGraph.getJobID(); ClusterClient<?> client = miniClusterResource.getClusterClient(); try { JobSubmissionResult result = ClientUtils.submitJob(client, jobGraph); return CompletableFuture .runAsync(waitingSource::awaitSource) .thenCompose(ignore -> triggerSavepoint(client, result.getJobID())) .get(5, TimeUnit.MINUTES); } catch (Exception e) { throw new RuntimeException("Failed to take savepoint", e); } finally { client.cancel(jobId); } }
Example #14
Source File: PulsarTableSource.java From pulsar-flink with Apache License 2.0 | 6 votes |
@Override public DataStream<Row> getDataStream(StreamExecutionEnvironment execEnv) { FlinkPulsarRowSource source = new FlinkPulsarRowSource(serviceUrl, adminUrl, properties); switch (startupMode) { case EARLIEST: source.setStartFromEarliest(); break; case LATEST: source.setStartFromLatest(); break; case SPECIFIC_OFFSETS: source.setStartFromSpecificOffsets(specificStartupOffsets); break; case EXTERNAL_SUBSCRIPTION: source.setStartFromSubscription(externalSubscriptionName); } return execEnv.addSource(source).name(explainSource()); }
Example #15
Source File: BasicTopicStreamingSample.java From solace-integration-guides with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); final Hashtable<String, String> jmsEnv = new Hashtable<>(); jmsEnv.put(InitialContext.INITIAL_CONTEXT_FACTORY, "com.solacesystems.jndi.SolJNDIInitialContextFactory"); jmsEnv.put(InitialContext.PROVIDER_URL, "smf://192.168.56.101"); jmsEnv.put(Context.SECURITY_PRINCIPAL, "test@poc_vpn"); jmsEnv.put(Context.SECURITY_CREDENTIALS, "password"); env.addSource(new JMSTopicSource<String>(jmsEnv, "flink_cf", "flink/topic", new JMSTextTranslator())) .print(); env.execute(); }
Example #16
Source File: ChainLengthIncreaseTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void createRestoredJob(StreamExecutionEnvironment env) { /** * Original job: Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3) * Modified job: Source -> StatefulMap1 -> CHAIN(StatefulMap2 -> Map -> StatefulMap3 -> StatefulMap4) */ DataStream<Integer> source = createSource(env, ExecutionMode.RESTORE); SingleOutputStreamOperator<Integer> first = createFirstStatefulMap(ExecutionMode.RESTORE, source); first.startNewChain(); SingleOutputStreamOperator<Integer> second = createSecondStatefulMap(ExecutionMode.RESTORE, first); second.startNewChain(); SingleOutputStreamOperator<Integer> stateless = createStatelessMap(second); SingleOutputStreamOperator<Integer> stateless2 = createStatelessMap(stateless); SingleOutputStreamOperator<Integer> third = createThirdStatefulMap(ExecutionMode.RESTORE, stateless2); }
Example #17
Source File: KafkaBaseSource.java From sylph with Apache License 2.0 | 5 votes |
/** * 初始化(driver阶段执行) **/ public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context) { requireNonNull(execEnv, "execEnv is null"); requireNonNull(config, "config is null"); String topics = config.getTopics(); String groupId = config.getGroupid(); String offsetMode = config.getOffsetMode(); //latest earliest Properties properties = new Properties(); for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) { if (entry.getValue() != null) { properties.setProperty(entry.getKey(), entry.getValue().toString()); } } properties.put("bootstrap.servers", config.getBrokers()); //需要把集群的host 配置到程序所在机器 //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量 // "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错 // "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期 properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误 properties.put("auto.offset.reset", offsetMode); //latest earliest KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ? new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer(); List<String> topicSets = Arrays.asList(topics.split(",")); //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties); return execEnv.addSource(base); }
Example #18
Source File: WindowJoin.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // parse the parameters final ParameterTool params = ParameterTool.fromArgs(args); final long windowSize = params.getLong("windowSize", 2000); final long rate = params.getLong("rate", 3L); System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate); System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]"); // obtain execution environment, run this example in "ingestion time" StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create the data sources for both grades and salaries DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate); DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate); // run the actual window join program // for testability, this functionality is in a separate method. DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize); // print the results with a single thread, rather than in parallel joinedStream.print().setParallelism(1); // execute program env.execute("Windowed Join Example"); }
Example #19
Source File: BravoTestPipeline.java From bravo with Apache License 2.0 | 5 votes |
private StreamExecutionEnvironment createJobGraph(int parallelism, Function<DataStream<String>, DataStream<String>> pipelinerBuilder) throws Exception { final Path checkpointDir = getCheckpointDir(); final Path savepointRootDir = getSavepointDir(); checkpointDir.getFileSystem().mkdirs(checkpointDir); savepointRootDir.getFileSystem().mkdirs(savepointRootDir); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); env.getCheckpointConfig().enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setBufferTimeout(0); env.setParallelism(parallelism); env.enableCheckpointing(500, CheckpointingMode.EXACTLY_ONCE); env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir.toString(), true)); DataStream<String> sourceData = env .addSource(new TestPipelineSource()) .uid("TestSource") .name("TestSource") .setParallelism(1); pipelinerBuilder.apply(sourceData) .addSink(new CollectingSink()).name("Output").uid("Output") .setParallelism(1); return env; }
Example #20
Source File: AllWindowTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("rawtypes") public void testReduceWithEvictorAndProcessFunction() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DummyReducer reducer = new DummyReducer(); DataStream<Tuple2<String, Integer>> window1 = source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .evictor(CountEvictor.of(100)) .reduce( reducer, new ProcessAllWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, TimeWindow>() { @Override public void process( Context context, Iterable<Tuple2<String, Integer>> elements, Collector<Tuple2<String, Integer>> out) throws Exception { for (Tuple2<String, Integer> in : elements) { out.collect(in); } } }); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof EvictingWindowOperator); EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?> winOperator = (EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getEvictor() instanceof CountEvictor); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
Example #21
Source File: MLEnvironmentTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void testConstructWithStreamEnv() { StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(streamExecutionEnvironment); MLEnvironment mlEnvironment = new MLEnvironment(streamExecutionEnvironment, streamTableEnvironment); Assert.assertSame(mlEnvironment.getStreamExecutionEnvironment(), streamExecutionEnvironment); Assert.assertSame(mlEnvironment.getStreamTableEnvironment(), streamTableEnvironment); }
Example #22
Source File: UnalignedCheckpointCompatibilityITCase.java From flink with Apache License 2.0 | 5 votes |
private StreamExecutionEnvironment externalCheckpointEnv(boolean isAligned, File dir, int checkpointingInterval) { Map<ConfigOption<?>, String> cfg = new HashMap<>(); cfg.put(CHECKPOINTS_DIRECTORY, dir.toURI().toString()); cfg.put(MAX_RETAINED_CHECKPOINTS, Integer.toString(Integer.MAX_VALUE)); // prevent deletion of checkpoint files while it's being checked and used StreamExecutionEnvironment env = env(isAligned, checkpointingInterval, cfg); env.getCheckpointConfig().enableExternalizedCheckpoints(RETAIN_ON_CANCELLATION); return env; }
Example #23
Source File: ContinuousFileReaderOperatorBenchmark.java From flink-benchmarks with Apache License 2.0 | 5 votes |
@Benchmark public void readFileSplit(FlinkEnvironmentContext context) throws Exception { StreamExecutionEnvironment env = context.env; env.setRestartStrategy(new RestartStrategies.NoRestartStrategyConfiguration()); env .enableCheckpointing(100) .setParallelism(1) .addSource(new MockSourceFunction()) .transform("fileReader", TypeInformation.of(String.class), new ContinuousFileReaderOperatorFactory<>(new MockInputFormat())) .addSink(new LimitedSink()); env.execute(); }
Example #24
Source File: WindowTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("rawtypes") public void testReduceWithProcesWindowFunctionEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DummyReducer reducer = new DummyReducer(); DataStream<Tuple3<String, String, Integer>> window = source .keyBy(new TupleKeySelector()) .window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .reduce(reducer, new ProcessWindowFunction<Tuple2<String, Integer>, Tuple3<String, String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L; @Override public void process(String key, Context ctx, Iterable<Tuple2<String, Integer>> values, Collector<Tuple3<String, String, Integer>> out) throws Exception { for (Tuple2<String, Integer> in : values) { out.collect(new Tuple3<>(in.f0, in.f0, in.f1)); } } }); OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
Example #25
Source File: DistributedCacheViaBlobTestProgram.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final Path inputFile = Paths.get(params.getRequired("inputFile")); final Path inputDir = Paths.get(params.getRequired("inputDir")); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.registerCachedFile(inputFile.toString(), "test_data", false); env.registerCachedFile(inputDir.toString(), "test_dir", false); final Path containedFile; try (Stream<Path> files = Files.list(inputDir)) { containedFile = files.findAny().orElseThrow(() -> new RuntimeException("Input directory must not be empty.")); } env.fromElements(1) .map(new TestMapFunction( inputFile.toAbsolutePath().toString(), Files.size(inputFile), inputDir.toAbsolutePath().toString(), containedFile.getFileName().toString())) .writeAsText(params.getRequired("output"), FileSystem.WriteMode.OVERWRITE); env.execute("Distributed Cache Via Blob Test Program"); }
Example #26
Source File: FlinkPulsarTableITest.java From pulsar-flink with Apache License 2.0 | 5 votes |
@Test public void testStructTypesWithJavaArray() throws Exception { StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); StreamTableEnvironment tEnv = StreamTableEnvironment.create(see); String table = newTopic(); sendTypedMessages(table, SchemaType.AVRO, faList, Optional.empty(), SchemaData.FA.class); tEnv .connect(getPulsarDescriptor(table)) .inAppendMode() .registerTableSource(table); Table t = tEnv.scan(table).select("l"); tEnv.toAppendStream(t, t.getSchema().toRowType()) .map(new FailingIdentityMapper<Row>(faList.size())) .addSink(new SingletonStreamSink.StringSink<>()).setParallelism(1); try { see.execute("test struct in avro"); } catch (Exception e) { } SingletonStreamSink.compareWithList( faList.subList(0, faList.size() - 1).stream().map(Objects::toString).collect(Collectors.toList())); }
Example #27
Source File: BroadcastStream.java From flink with Apache License 2.0 | 5 votes |
protected BroadcastStream( final StreamExecutionEnvironment env, final DataStream<T> input, final MapStateDescriptor<?, ?>... broadcastStateDescriptors) { this.environment = requireNonNull(env); this.inputStream = requireNonNull(input); this.broadcastStateDescriptors = Arrays.asList(requireNonNull(broadcastStateDescriptors)); }
Example #28
Source File: AllWindowTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testAggregateEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyAggregationFunction()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
Example #29
Source File: FromCollection.java From blog_demos with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //并行度为1 env.setParallelism(1); //创建一个List,里面有两个Tuple2元素 List<Tuple2<String, Integer>> list = new ArrayList<>(); list.add(new Tuple2("aaa", 1)); list.add(new Tuple2("bbb", 1)); //通过List创建DataStream DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list); //通过多个Tuple2元素创建DataStream DataStream<Tuple2<String, Integer>> fromElementDataStream = env.fromElements( new Tuple2("ccc", 1), new Tuple2("ddd", 1), new Tuple2("aaa", 1) ); //通过union将两个DataStream合成一个 DataStream<Tuple2<String, Integer>> unionDataStream = fromCollectionDataStream.union(fromElementDataStream); //统计每个单词的数量 unionDataStream .keyBy(0) .sum(1) .print(); env.execute("API DataSource demo : collection"); }
Example #30
Source File: SideOutputEvent.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env); //从 Kafka 获取到所有的数据流 SingleOutputStreamOperator<MetricEvent> sideOutputData = data.process(new ProcessFunction<MetricEvent, MetricEvent>() { @Override public void processElement(MetricEvent metricEvent, Context context, Collector<MetricEvent> collector) throws Exception { String type = metricEvent.getTags().get("type"); switch (type) { case "machine": context.output(machineTag, metricEvent); case "docker": context.output(dockerTag, metricEvent); case "application": context.output(applicationTag, metricEvent); case "middleware": context.output(middlewareTag, metricEvent); default: collector.collect(metricEvent); } } }); DataStream<MetricEvent> machine = sideOutputData.getSideOutput(machineTag); DataStream<MetricEvent> docker = sideOutputData.getSideOutput(dockerTag); DataStream<MetricEvent> application = sideOutputData.getSideOutput(applicationTag); DataStream<MetricEvent> middleware = sideOutputData.getSideOutput(middlewareTag); }