Java Code Examples for org.apache.flink.api.java.DataSet#filter()
The following examples show how to use
org.apache.flink.api.java.DataSet#filter() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FilterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testFilterOnCustomType() throws Exception { /* * Test filter on custom type */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> filterDs = ds. filter(new Filter6()); List<CustomType> result = filterDs.collect(); String expected = "3,3,Hello world, how are you?\n" + "3,4,I am fine.\n" + "3,5,Luke Skywalker\n"; compareResultAsText(result, expected); }
Example 2
Source File: FilterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testAllRejectingFilter() throws Exception { /* * Test all-rejecting filter. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new Filter1()); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "\n"; compareResultAsTuples(result, expected); }
Example 3
Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testFilterBasicType() throws Exception { /* * Test filter on basic type */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> ds = CollectionDataSets.getStringDataSet(env); DataSet<String> filterDs = ds. filter(new Filter5()); List<String> result = filterDs.collect(); String expected = "Hi\n" + "Hello\n" + "Hello world\n" + "Hello world, how are you?\n"; compareResultAsText(result, expected); }
Example 4
Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testFilterOnCustomType() throws Exception { /* * Test filter on custom type */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> filterDs = ds. filter(new Filter6()); List<CustomType> result = filterDs.collect(); String expected = "3,3,Hello world, how are you?\n" + "3,4,I am fine.\n" + "3,5,Luke Skywalker\n"; compareResultAsText(result, expected); }
Example 5
Source File: FilterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testFilterOnCustomType() throws Exception { /* * Test filter on custom type */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> filterDs = ds. filter(new Filter6()); List<CustomType> result = filterDs.collect(); String expected = "3,3,Hello world, how are you?\n" + "3,4,I am fine.\n" + "3,5,Luke Skywalker\n"; compareResultAsText(result, expected); }
Example 6
Source File: FilterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testFilterOnIntegerTupleField() throws Exception { /* * Test filter on Integer tuple field. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new Filter4()); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "2,2,Hello\n" + "4,3,Hello world, how are you?\n" + "6,3,Luke Skywalker\n" + "8,4,Comment#2\n" + "10,4,Comment#4\n" + "12,5,Comment#6\n" + "14,5,Comment#8\n" + "16,6,Comment#10\n" + "18,6,Comment#12\n" + "20,6,Comment#14\n"; compareResultAsTuples(result, expected); }
Example 7
Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testAllRejectingFilter() throws Exception { /* * Test all-rejecting filter. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new Filter1()); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "\n"; compareResultAsTuples(result, expected); }
Example 8
Source File: FilterWithIndirection.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.fromElements("Please filter", "the words", "but not this"); DataSet<String> output = input.filter(UtilFunctionWrapper.UtilFunction.getWordFilter()); output.print(); env.execute(); }
Example 9
Source File: FilterWithMethodReference.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.fromElements("Please filter", "the words", "but not this"); FilterFunction<String> filter = WordFilter::filter; DataSet<String> output = input.filter(filter); output.print(); env.execute(); }
Example 10
Source File: FilterITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAllPassingFilter() throws Exception { /* * Test all-passing filter. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new Filter2()); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n" + "4,3,Hello world, how are you?\n" + "5,3,I am fine.\n" + "6,3,Luke Skywalker\n" + "7,4,Comment#1\n" + "8,4,Comment#2\n" + "9,4,Comment#3\n" + "10,4,Comment#4\n" + "11,5,Comment#5\n" + "12,5,Comment#6\n" + "13,5,Comment#7\n" + "14,5,Comment#8\n" + "15,5,Comment#9\n" + "16,6,Comment#10\n" + "17,6,Comment#11\n" + "18,6,Comment#12\n" + "19,6,Comment#13\n" + "20,6,Comment#14\n" + "21,6,Comment#15\n"; compareResultAsTuples(result, expected); }
Example 11
Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0 | 5 votes |
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
Example 12
Source File: FilterWithLambda.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("Convert2MethodRef") public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.fromElements("Please filter", "the words", "but not this"); DataSet<String> output = input.filter((v) -> WordFilter.filter(v)); output.print(); env.execute(); }
Example 13
Source File: EmptyFieldsCountAccumulator.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
Example 14
Source File: FilterWithIndirection.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.fromElements("Please filter", "the words", "but not this"); DataSet<String> output = input.filter(UtilFunctionWrapper.UtilFunction.getWordFilter()); output.print(); env.execute(); }
Example 15
Source File: FilterWithLambda.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("Convert2MethodRef") public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.fromElements("Please filter", "the words", "but not this"); DataSet<String> output = input.filter((v) -> WordFilter.filter(v)); output.print(); env.execute(); }
Example 16
Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0 | 5 votes |
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
Example 17
Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testAllPassingFilter() throws Exception { /* * Test all-passing filter. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new Filter2()); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n" + "4,3,Hello world, how are you?\n" + "5,3,I am fine.\n" + "6,3,Luke Skywalker\n" + "7,4,Comment#1\n" + "8,4,Comment#2\n" + "9,4,Comment#3\n" + "10,4,Comment#4\n" + "11,5,Comment#5\n" + "12,5,Comment#6\n" + "13,5,Comment#7\n" + "14,5,Comment#8\n" + "15,5,Comment#9\n" + "16,6,Comment#10\n" + "17,6,Comment#11\n" + "18,6,Comment#12\n" + "19,6,Comment#13\n" + "20,6,Comment#14\n" + "21,6,Comment#15\n"; compareResultAsTuples(result, expected); }
Example 18
Source File: WebLogAnalysis.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // get input data DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params); // Retain documents with keywords DataSet<Tuple1<String>> filterDocs = documents .filter(new FilterDocByKeyWords()) .project(0); // Filter ranks by minimum rank DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks .filter(new FilterByRank()); // Filter visits by visit date DataSet<Tuple1<String>> filterVisits = visits .filter(new FilterVisitsByDate()) .project(0); // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = filterDocs.join(filterRanks) .where(0).equalTo(1) .projectSecond(0, 1, 2); // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time DataSet<Tuple3<Integer, String, Integer>> result = joinDocsRanks.coGroup(filterVisits) .where(1).equalTo(0) .with(new AntiJoinVisits()); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("WebLogAnalysis Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
Example 19
Source File: TPCDSQuery55CSV.java From parquet-flinktacular with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { long startTime = System.currentTimeMillis(); if (!parseParameters(args)) { return; } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<DateDim> dataDims = getDataDimDataSet(env); DataSet<Item> item = getItemDataSet(env); DataSet<StoreSales> storeSales = getStoreSalesDataSet(env); dataDims = dataDims.filter( new FilterFunction<DateDim>() { @Override public boolean filter(DateDim d) { return d.getD_moy() == 11L && d.getD_year() == 1999L; } }); item = item.filter( new FilterFunction<Item>() { @Override public boolean filter(Item i) { return i.getI_manager_id() == 28L; } }); dataDims.join(storeSales).where(0).equalTo(0).with(new DataDimAndStoreSales()) .join(item).where(1).equalTo(0).with(new DataDimAndStoreSalesAndItems()) .groupBy(1, 0).aggregate(Aggregations.SUM, 2) .print(); // execute program env.execute("TPC-DS Query 55 Example with CSV input"); System.out.println("Execution time: " + (System.currentTimeMillis() - startTime)); }
Example 20
Source File: WebLogAnalysis.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // get input data DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params); // Retain documents with keywords DataSet<Tuple1<String>> filterDocs = documents .filter(new FilterDocByKeyWords()) .project(0); // Filter ranks by minimum rank DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks .filter(new FilterByRank()); // Filter visits by visit date DataSet<Tuple1<String>> filterVisits = visits .filter(new FilterVisitsByDate()) .project(0); // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = filterDocs.join(filterRanks) .where(0).equalTo(1) .projectSecond(0, 1, 2); // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time DataSet<Tuple3<Integer, String, Integer>> result = joinDocsRanks.coGroup(filterVisits) .where(1).equalTo(0) .with(new AntiJoinVisits()); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("WebLogAnalysis Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }