Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#readTextFile()
The following examples show how to use
org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#readTextFile() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContinuousFileReaderOperatorITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testEndInput() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); final File sourceFile = TEMPORARY_FOLDER.newFile(); final int elementCount = 10000; try (PrintWriter printWriter = new PrintWriter(sourceFile)) { for (int i = 0; i < elementCount; i++) { printWriter.println(i); } } DataStreamSource<String> source = env.readTextFile(sourceFile.getAbsolutePath()); // check the endInput is invoked at the right time TestBoundedOneInputStreamOperator checkingOperator = new TestBoundedOneInputStreamOperator(elementCount); DataStream<String> endInputChecking = source.transform("EndInputChecking", STRING_TYPE_INFO, checkingOperator); endInputChecking.addSink(new DiscardingSink<>()); env.execute("ContinuousFileReaderOperatorITCase.testEndInput"); }
Example 2
Source File: ReadTextFile.java From blog_demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //设置并行度为1 env.setParallelism(1); //用txt文件作为数据源 DataStream<String> textDataStream = env.readTextFile("file:///Users/zhaoqin/temp/202003/14/README.txt", "UTF-8"); //统计单词数量并打印出来 textDataStream .flatMap(new Splitter()) .keyBy(0) .sum(1) .print(); env.execute("API DataSource demo : readTextFile"); }
Example 3
Source File: DrivingSessions.java From flink-training-exercises with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // read parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // connect to the data file DataStream<String> carData = env.readTextFile(input); // find segments DataStream<ConnectedCarEvent> events = carData .map((String line) -> ConnectedCarEvent.fromString(line)) .assignTimestampsAndWatermarks(new ConnectedCarAssigner()); events.keyBy("carId") .window(EventTimeSessionWindows.withGap(Time.seconds(15))) .apply(new CreateGapSegment()) .print(); env.execute("Driving Sessions"); }
Example 4
Source File: CarEventSort.java From flink-training-exercises with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // read parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); // connect to the data file DataStream<String> carData = env.readTextFile(input); // map to events DataStream<ConnectedCarEvent> events = carData .map((String line) -> ConnectedCarEvent.fromString(line)) .assignTimestampsAndWatermarks(new ConnectedCarAssigner()); // sort events events.keyBy((ConnectedCarEvent event) -> event.carId) .process(new SortFunction()) .print(); env.execute("Sort Connected Car Events"); }
Example 5
Source File: WordCount.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataStream<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); // get default test text data text = env.fromElements(WordCountData.WORDS); } DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .keyBy(0).sum(1); // emit result if (params.has("output")) { counts.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } // execute program env.execute("Streaming WordCount"); }
Example 6
Source File: Main.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataStreamSource<String> data = env.readTextFile("file:///usr/local/blink-1.5.1/README.txt"); data.print(); //两种格式都行,另外还支持写入到 hdfs // data.writeAsText("file:///usr/local/blink-1.5.1/README1.txt"); data.writeAsText("/usr/local/blink-1.5.1/README1.txt"); env.execute(); }
Example 7
Source File: InsideDataSource.java From flink-simple-tutorial with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 添加数组作为数据输入源 String[] elementInput = new String[]{"hello Flink", "Second Line"}; DataStream<String> text = env.fromElements(elementInput); // 添加List集合作为数据输入源 List<String> collectionInput = new ArrayList<>(); collectionInput.add("hello Flink"); DataStream<String> text2 = env.fromCollection(collectionInput); // 添加Socket作为数据输入源 // 4个参数 -> (hostname:Ip地址, port:端口, delimiter:分隔符, maxRetry:最大重试次数) DataStream<String> text3 = env.socketTextStream("localhost", 9999, "\n", 4); // 添加文件源 // 直接读取文本文件 DataStream<String> text4 = env.readTextFile("/opt/history.log"); // 指定 CsvInputFormat, 监控csv文件(两种模式), 时间间隔是10ms DataStream<String> text5 = env.readFile(new CsvInputFormat<String>(new Path("/opt/history.csv")) { @Override protected String fillRecord(String s, Object[] objects) { return null; } },"/opt/history.csv", FileProcessingMode.PROCESS_CONTINUOUSLY,10); text.print(); env.execute("Inside DataSource Demo"); }
Example 8
Source File: WordCount.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataStream<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); // get default test text data text = env.fromElements(WordCountData.WORDS); } DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .keyBy(0).sum(1); // emit result if (params.has("output")) { counts.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } // execute program env.execute("Streaming WordCount"); }
Example 9
Source File: Main.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataStreamSource<String> data = env.readTextFile("file:///usr/local/blink-1.5.1/README.txt"); data.print(); //两种格式都行,另外还支持写入到 hdfs // data.writeAsText("file:///usr/local/blink-1.5.1/README1.txt"); data.writeAsText("/usr/local/blink-1.5.1/README1.txt"); env.execute(); }
Example 10
Source File: DrivingSegments.java From flink-training-exercises with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // read parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // connect to the data file DataStream<String> carData = env.readTextFile(input); // map to events DataStream<ConnectedCarEvent> events = carData .map((String line) -> ConnectedCarEvent.fromString(line)) .assignTimestampsAndWatermarks(new ConnectedCarAssigner()); // find segments events.keyBy("carId") .window(GlobalWindows.create()) .trigger(new SegmentingOutOfOrderTrigger()) .evictor(new SegmentingEvictor()) .apply(new CreateStoppedSegment()) .print(); env.execute("Driving Segments"); }
Example 11
Source File: WindowWordCount.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // get input data DataStream<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WindowWordCount example with default input data set."); System.out.println("Use --input to specify file input."); // get default test text data text = env.fromElements(WordCountData.WORDS); } // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); final int windowSize = params.getInt("window", 10); final int slideSize = params.getInt("slide", 5); DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new WordCount.Tokenizer()) // create windows of windowSize records slided every slideSize records .keyBy(0) .countWindow(windowSize, slideSize) // group by the tuple field "0" and sum up tuple field "1" .sum(1); // emit result if (params.has("output")) { counts.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } // execute program env.execute("WindowWordCount"); }
Example 12
Source File: WindowWordCount.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // get input data DataStream<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WindowWordCount example with default input data set."); System.out.println("Use --input to specify file input."); // get default test text data text = env.fromElements(WordCountData.WORDS); } // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); final int windowSize = params.getInt("window", 10); final int slideSize = params.getInt("slide", 5); DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new WordCount.Tokenizer()) // create windows of windowSize records slided every slideSize records .keyBy(0) .countWindow(windowSize, slideSize) // group by the tuple field "0" and sum up tuple field "1" .sum(1); // emit result if (params.has("output")) { counts.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } // execute program env.execute("WindowWordCount"); }
Example 13
Source File: WordCount.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final MultipleParameterTool params = MultipleParameterTool.fromArgs(args); // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataStream<String> text = null; if (params.has("input")) { // union all the inputs from text files for (String input : params.getMultiParameterRequired("input")) { if (text == null) { text = env.readTextFile(input); } else { text = text.union(env.readTextFile(input)); } } Preconditions.checkNotNull(text, "Input DataStream should not be null."); } else { System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); // get default test text data text = env.fromElements(WordCountData.WORDS); } DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .keyBy(0).sum(1); // emit result if (params.has("output")) { counts.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } // execute program env.execute("Streaming WordCount"); }
Example 14
Source File: WindowWordCount.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // get input data DataStream<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WindowWordCount example with default input data set."); System.out.println("Use --input to specify file input."); // get default test text data text = env.fromElements(WordCountData.WORDS); } // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); final int windowSize = params.getInt("window", 10); final int slideSize = params.getInt("slide", 5); DataStream<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new WordCount.Tokenizer()) // create windows of windowSize records slided every slideSize records .keyBy(0) .countWindow(windowSize, slideSize) // group by the tuple field "0" and sum up tuple field "1" .sum(1); // emit result if (params.has("output")) { counts.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } // execute program env.execute("WindowWordCount"); }