org.apache.flink.api.java.DataSet#filter

Source File: FilterITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFilterOnCustomType() throws Exception {
	/*
	 * Test filter on custom type
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> filterDs = ds.
			filter(new Filter6());
	List<CustomType> result = filterDs.collect();

	String expected = "3,3,Hello world, how are you?\n"
			+
			"3,4,I am fine.\n" +
			"3,5,Luke Skywalker\n";

	compareResultAsText(result, expected);
}

Source File: FilterITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testAllRejectingFilter() throws Exception {
	/*
	 * Test all-rejecting filter.
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new Filter1());

	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "\n";

	compareResultAsTuples(result, expected);
}

Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testFilterBasicType() throws Exception {
	/*
	 * Test filter on basic type
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
	DataSet<String> filterDs = ds.
			filter(new Filter5());
	List<String> result = filterDs.collect();

	String expected = "Hi\n" +
			"Hello\n" +
			"Hello world\n" +
			"Hello world, how are you?\n";

	compareResultAsText(result, expected);
}

Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testFilterOnCustomType() throws Exception {
	/*
	 * Test filter on custom type
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> filterDs = ds.
			filter(new Filter6());
	List<CustomType> result = filterDs.collect();

	String expected = "3,3,Hello world, how are you?\n"
			+
			"3,4,I am fine.\n" +
			"3,5,Luke Skywalker\n";

	compareResultAsText(result, expected);
}

Source File: FilterITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFilterOnCustomType() throws Exception {
	/*
	 * Test filter on custom type
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> filterDs = ds.
			filter(new Filter6());
	List<CustomType> result = filterDs.collect();

	String expected = "3,3,Hello world, how are you?\n"
			+
			"3,4,I am fine.\n" +
			"3,5,Luke Skywalker\n";

	compareResultAsText(result, expected);
}

Source File: FilterITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFilterOnIntegerTupleField() throws Exception {
	/*
	 * Test filter on Integer tuple field.
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new Filter4());
	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "2,2,Hello\n" +
			"4,3,Hello world, how are you?\n" +
			"6,3,Luke Skywalker\n" +
			"8,4,Comment#2\n" +
			"10,4,Comment#4\n" +
			"12,5,Comment#6\n" +
			"14,5,Comment#8\n" +
			"16,6,Comment#10\n" +
			"18,6,Comment#12\n" +
			"20,6,Comment#14\n";

	compareResultAsTuples(result, expected);
}

Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testAllRejectingFilter() throws Exception {
	/*
	 * Test all-rejecting filter.
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new Filter1());

	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "\n";

	compareResultAsTuples(result, expected);
}

Source File: FilterWithIndirection.java From Flink-CEPplus with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> input = env.fromElements("Please filter", "the words", "but not this");

	DataSet<String> output = input.filter(UtilFunctionWrapper.UtilFunction.getWordFilter());
	output.print();

	env.execute();
}

Source File: FilterWithMethodReference.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> input = env.fromElements("Please filter", "the words", "but not this");

	FilterFunction<String> filter = WordFilter::filter;

	DataSet<String> output = input.filter(filter);
	output.print();

	env.execute();
}

Source File: FilterITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAllPassingFilter() throws Exception {
	/*
	 * Test all-passing filter.
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new Filter2());
	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "1,1,Hi\n" +
			"2,2,Hello\n" +
			"3,2,Hello world\n" +
			"4,3,Hello world, how are you?\n" +
			"5,3,I am fine.\n" +
			"6,3,Luke Skywalker\n" +
			"7,4,Comment#1\n" +
			"8,4,Comment#2\n" +
			"9,4,Comment#3\n" +
			"10,4,Comment#4\n" +
			"11,5,Comment#5\n" +
			"12,5,Comment#6\n" +
			"13,5,Comment#7\n" +
			"14,5,Comment#8\n" +
			"15,5,Comment#9\n" +
			"16,6,Comment#10\n" +
			"17,6,Comment#11\n" +
			"18,6,Comment#12\n" +
			"19,6,Comment#13\n" +
			"20,6,Comment#14\n" +
			"21,6,Comment#15\n";

	compareResultAsTuples(result, expected);
}

Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: FilterWithLambda.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("Convert2MethodRef")
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> input = env.fromElements("Please filter", "the words", "but not this");

	DataSet<String> output = input.filter((v) -> WordFilter.filter(v));
	output.print();

	env.execute();
}

Source File: EmptyFieldsCountAccumulator.java From Flink-CEPplus with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: FilterWithIndirection.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> input = env.fromElements("Please filter", "the words", "but not this");

	DataSet<String> output = input.filter(UtilFunctionWrapper.UtilFunction.getWordFilter());
	output.print();

	env.execute();
}

Source File: FilterWithLambda.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("Convert2MethodRef")
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> input = env.fromElements("Please filter", "the words", "but not this");

	DataSet<String> output = input.filter((v) -> WordFilter.filter(v));
	output.print();

	env.execute();
}

Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: FilterITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testAllPassingFilter() throws Exception {
	/*
	 * Test all-passing filter.
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new Filter2());
	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "1,1,Hi\n" +
			"2,2,Hello\n" +
			"3,2,Hello world\n" +
			"4,3,Hello world, how are you?\n" +
			"5,3,I am fine.\n" +
			"6,3,Luke Skywalker\n" +
			"7,4,Comment#1\n" +
			"8,4,Comment#2\n" +
			"9,4,Comment#3\n" +
			"10,4,Comment#4\n" +
			"11,5,Comment#5\n" +
			"12,5,Comment#6\n" +
			"13,5,Comment#7\n" +
			"14,5,Comment#8\n" +
			"15,5,Comment#9\n" +
			"16,6,Comment#10\n" +
			"17,6,Comment#11\n" +
			"18,6,Comment#12\n" +
			"19,6,Comment#13\n" +
			"20,6,Comment#14\n" +
			"21,6,Comment#15\n";

	compareResultAsTuples(result, expected);
}

Source File: WebLogAnalysis.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}

Source File: TPCDSQuery55CSV.java From parquet-flinktacular with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		long startTime = System.currentTimeMillis();

		if (!parseParameters(args)) {
			return;
		}

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<DateDim> dataDims = getDataDimDataSet(env);
		DataSet<Item> item = getItemDataSet(env);
		DataSet<StoreSales> storeSales = getStoreSalesDataSet(env);

		dataDims = dataDims.filter(
			new FilterFunction<DateDim>() {
				@Override
				public boolean filter(DateDim d) {
					return d.getD_moy() == 11L && d.getD_year() == 1999L;
				}
			});

		item = item.filter(
			new FilterFunction<Item>() {
				@Override
				public boolean filter(Item i) {
					return i.getI_manager_id() == 28L;
				}
			});

		dataDims.join(storeSales).where(0).equalTo(0).with(new DataDimAndStoreSales())
			.join(item).where(1).equalTo(0).with(new DataDimAndStoreSalesAndItems())
			.groupBy(1, 0).aggregate(Aggregations.SUM, 2)
			.print();

		// execute program
		env.execute("TPC-DS Query 55 Example with CSV input");


		System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
	}

Source File: WebLogAnalysis.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}

Java Code Examples for org.apache.flink.api.java.DataSet#filter()