Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#readTextFile()
The following examples show how to use
org.apache.flink.api.java.ExecutionEnvironment#readTextFile() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordCountNestedPOJOITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WC> counts = text .flatMap(new Tokenizer()) .groupBy("complex.someTest") .reduce(new ReduceFunction<WC>() { private static final long serialVersionUID = 1L; public WC reduce(WC value1, WC value2) { return new WC(value1.complex.someTest, value1.count + value2.count); } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
Example 2
Source File: BatchWCJavaApp.java From 163-bigdate-note with GNU General Public License v3.0 | 6 votes |
public static void main(String[] args) throws Exception { String input = "file:///D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input"; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<String> text = env.readTextFile(input); // text.print(); text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() { @Override public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) throws Exception { String[] tokens = value.toLowerCase().split(","); for (String token : tokens) { if (token.length() > 0) { collector.collect(new Tuple2<String, Integer>(token, 1)); } } } }).groupBy(0).sum(1).print(); }
Example 3
Source File: Readonly.java From flink-perf with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); System.err.println("Using input="+args[0]); // get input data DataSet<String> text = env.readTextFile(args[0]); DataSet<String> res = text.filter(new FilterFunction<String>() { @Override public boolean filter(String value) throws Exception { return false; } }); res.writeAsText("file:///tmp/out", WriteMode.OVERWRITE); // execute program env.execute("Read only job"); }
Example 4
Source File: WordCountSimplePOJOITCase.java From flink with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WC> counts = text .flatMap(new Tokenizer()) .groupBy("word") .reduce(new ReduceFunction<WC>() { private static final long serialVersionUID = 1L; public WC reduce(WC value1, WC value2) { return new WC(value1.word, value1.count + value2.count); } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
Example 5
Source File: RatingsDistribution.java From flink-examples with MIT License | 6 votes |
public static void main(String[] args) throws Exception { // parse parameters ParameterTool params = ParameterTool.fromArgs(args); // path to ratings.csv file String ratingsCsvPath = params.getRequired("input"); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<String> file = env.readTextFile(ratingsCsvPath); file.flatMap(new ExtractRating()) .groupBy(0) // .reduceGroup(new SumRatingCount()) .sum(1) .print(); }
Example 6
Source File: WordCountSubclassPOJOITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WCBase> counts = text .flatMap(new Tokenizer()) .groupBy("word") .reduce(new ReduceFunction<WCBase>() { private static final long serialVersionUID = 1L; public WCBase reduce(WCBase value1, WCBase value2) { WC wc1 = (WC) value1; WC wc2 = (WC) value2; return new WC(value1.word, wc1.secretCount + wc2.secretCount); } }) .map(new MapFunction<WCBase, WCBase>() { @Override public WCBase map(WCBase value) throws Exception { WC wc = (WC) value; wc.count = wc.secretCount; return wc; } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
Example 7
Source File: MusicProfiles.java From flink with Apache License 2.0 | 5 votes |
private static DataSet<String> getMismatchesData(ExecutionEnvironment env) { if (fileOutput) { return env.readTextFile(mismatchesInputPath); } else { return MusicProfilesData.getMismatches(env); } }
Example 8
Source File: WordCountSubclassPOJOITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WCBase> counts = text .flatMap(new Tokenizer()) .groupBy("word") .reduce(new ReduceFunction<WCBase>() { private static final long serialVersionUID = 1L; public WCBase reduce(WCBase value1, WCBase value2) { WC wc1 = (WC) value1; WC wc2 = (WC) value2; return new WC(value1.word, wc1.secretCount + wc2.secretCount); } }) .map(new MapFunction<WCBase, WCBase>() { @Override public WCBase map(WCBase value) throws Exception { WC wc = (WC) value; wc.count = wc.secretCount; return wc; } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
Example 9
Source File: WordCountDemo.java From flinkDemo with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // create execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // get input data DataSet<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { // get default test text data System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); text = WordCountData.getDefaultTextLineDataSet(env); } DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .sum(1); // emit result if (params.has("output")) { counts.writeAsCsv(params.get("output"), "\n", " "); // execute program env.execute("WordCount Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } }
Example 10
Source File: HBaseWriteExample.java From flink with Apache License 2.0 | 5 votes |
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) { if (fileOutput) { // read the text file from given input path return env.readTextFile(textPath); } else { // get default test text data return getDefaultTextLineDataSet(env); } }
Example 11
Source File: HBaseWriteExample.java From flink with Apache License 2.0 | 5 votes |
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) { if (fileOutput) { // read the text file from given input path return env.readTextFile(textPath); } else { // get default test text data return getDefaultTextLineDataSet(env); } }
Example 12
Source File: WordCountHashAgg.java From flink-perf with Apache License 2.0 | 5 votes |
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) { if(fileOutput) { // read the text file from given input path return env.readTextFile(textPath); } else { // get default test text data throw new RuntimeException("No build in data"); } }
Example 13
Source File: MusicProfiles.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static DataSet<String> getMismatchesData(ExecutionEnvironment env) { if (fileOutput) { return env.readTextFile(mismatchesInputPath); } else { return MusicProfilesData.getMismatches(env); } }
Example 14
Source File: AccumulatorITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.readTextFile(dataPath); input.flatMap(new TokenizeLine()) .groupBy(0) .reduceGroup(new CountWords()) .writeAsCsv(resultPath, "\n", " "); this.result = env.execute(); }
Example 15
Source File: WordCountSubclassInterfacePOJOITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WCBase> counts = text .flatMap(new Tokenizer()) .groupBy("word") .reduce(new ReduceFunction<WCBase>() { private static final long serialVersionUID = 1L; public WCBase reduce(WCBase value1, WCBase value2) { WC wc1 = (WC) value1; WC wc2 = (WC) value2; int c = wc1.secretCount.getCount() + wc2.secretCount.getCount(); wc1.secretCount.setCount(c); return wc1; } }) .map(new MapFunction<WCBase, WCBase>() { @Override public WCBase map(WCBase value) throws Exception { WC wc = (WC) value; wc.count = wc.secretCount.getCount(); return wc; } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
Example 16
Source File: Prepare.java From flink-perf with Apache License 2.0 | 5 votes |
public static void main(final String[] args) throws Exception { // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(args[0]); DataSet<AvroLineitem> avro = text.map(new AvroLineItemMapper()); avro.write(new AvroOutputFormat<AvroLineitem>(AvroLineitem.class), args[1]); env.execute("Lineitem Text 2 Avro converter"); }
Example 17
Source File: WordCountPojo.java From flink-simple-tutorial with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataSet<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { // get default test text data System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); text = WordCountData.getDefaultTextLineDataSet(env); } DataSet<Word> counts = // split up the lines into Word objects (with frequency = 1) text.flatMap(new Tokenizer()) // group by the field word and sum up the frequency .groupBy("word") .reduce(new ReduceFunction<Word>() { @Override public Word reduce(Word value1, Word value2) throws Exception { return new Word(value1.word, value1.frequency + value2.frequency); } }); if (params.has("output")) { counts.writeAsText(params.get("output"), FileSystem.WriteMode.OVERWRITE); // execute program env.execute("WordCount-Pojo Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } }
Example 18
Source File: WordCount.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static DataSet<String> getTextDataSet(ExecutionEnvironment env) { if (fileOutput) { // read the text file from given input path return env.readTextFile(textPath); } else { // get default test text data return env.fromElements( "To be, or not to be,--that is the question:--", "Whether 'tis nobler in the mind to suffer", "The slings and arrows of outrageous fortune", "Or to take arms against a sea of troubles,", "And by opposing end them?--To die,--to sleep,--", "No more; and by a sleep to say we end", "The heartache, and the thousand natural shocks", "That flesh is heir to,--'tis a consummation", "Devoutly to be wish'd. To die,--to sleep;--", "To sleep! perchance to dream:--ay, there's the rub;", "For in that sleep of death what dreams may come,", "When we have shuffled off this mortal coil,", "Must give us pause: there's the respect", "That makes calamity of so long life;", "For who would bear the whips and scorns of time,", "The oppressor's wrong, the proud man's contumely,", "The pangs of despis'd love, the law's delay,", "The insolence of office, and the spurns", "That patient merit of the unworthy takes,", "When he himself might his quietus make", "With a bare bodkin? who would these fardels bear,", "To grunt and sweat under a weary life,", "But that the dread of something after death,--", "The undiscover'd country, from whose bourn", "No traveller returns,--puzzles the will,", "And makes us rather bear those ills we have", "Than fly to others that we know not of?", "Thus conscience does make cowards of us all;", "And thus the native hue of resolution", "Is sicklied o'er with the pale cast of thought;", "And enterprises of great pith and moment,", "With this regard, their currents turn awry,", "And lose the name of action.--Soft you now!", "The fair Ophelia!--Nymph, in thy orisons", "Be all my sins remember'd." ); } }
Example 19
Source File: GrepJob.java From flink-perf with Apache License 2.0 | 4 votes |
public static void main(final String[] args) throws Exception { // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); String in = args[0]; String out = args[1]; System.err.println("Using input=" + in); System.err.println("Using output=" + out); String patterns[] = new String[args.length - 2]; System.arraycopy(args, 2, patterns, 0, args.length - 2); System.err.println("Using patterns: " + Arrays.toString(patterns)); // get input data DataSet<String> text = env.readTextFile(args[0]); for (int p = 0; p < patterns.length; p++) { final String pattern = patterns[p]; DataSet<String> res = text.filter(new RichFilterFunction<String>() { private static final long serialVersionUID = 1L; Pattern p = Pattern.compile(pattern); LongCounter filterMatches = new LongCounter(); LongCounter filterRecords = new LongCounter(); @Override public void open(Configuration parameters) throws Exception { super.open(parameters); getRuntimeContext().addAccumulator("filterMatchCount-" + pattern, filterMatches); getRuntimeContext().addAccumulator("filterRecordCount-" + pattern, filterRecords); } @Override public boolean filter(String value) throws Exception { filterRecords.add(1L); if (value == null || value.length() == 0) { return false; } final Matcher m = p.matcher(value); if (m.find()) { filterMatches.add(1L); return true; } return false; } }).name("grep for " + pattern); res.writeAsText(out + "_" + pattern, FileSystem.WriteMode.OVERWRITE); } // execute program JobExecutionResult jobResult = env.execute("Flink Grep benchmark"); System.err.println(AccumulatorHelper.getResultsFormated(jobResult.getAllAccumulatorResults())); }
Example 20
Source File: WordCountWithoutCombine.java From flink-perf with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = env.readTextFile(textPath); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()).filter(new FilterFunction<Tuple2<String,Integer>>() { @Override public boolean filter(Tuple2<String, Integer> value) throws Exception { return !value.f1.equals(""); } }) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .reduceGroup(new GroupReduceFunction<Tuple2<String,Integer>, Tuple2<String, Integer>>() { @Override public void reduce( Iterable<Tuple2<String, Integer>> valuesIt, Collector<Tuple2<String, Integer>> out) throws Exception { Iterator<Tuple2<String, Integer>> values = valuesIt.iterator(); int count = 0; Tuple2<String, Integer> val = null; // this always works because the iterator always has something. while(values.hasNext()) { val = values.next(); count += val.f1; } val.f1 = count; out.collect(val); } }); counts.writeAsText(outputPath); // counts.writeAsCsv(outputPath, "\n", " "); // execute program env.execute("WordCountWithoutcombine"); }