Java Code Examples for org.apache.flink.test.operators.util.CollectionDataSets#get3TupleDataSet()

The following examples show how to use org.apache.flink.test.operators.util.CollectionDataSets#get3TupleDataSet() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FilterITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFilterWithBroadcastVariables() throws Exception {
	/*
	 * Test filter with broadcast variables
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new RichFilter2()).withBroadcastSet(intDs, "ints");
	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "11,5,Comment#5\n" +
			"12,5,Comment#6\n" +
			"13,5,Comment#7\n" +
			"14,5,Comment#8\n" +
			"15,5,Comment#9\n";

	compareResultAsTuples(result, expected);
}
 
Example 2
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}
 
Example 3
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfGroupReduceIfUDFReturnsInputObjectsMultipleTimesWhileChangingThem() throws Exception{
	/*
	 * check correctness of groupReduce if UDF returns input objects multiple times and changes it in between
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).reduceGroup(new InputReturningTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "11,1,Hi!\n" +
			"21,1,Hi again!\n" +
			"12,2,Hi!\n" +
			"22,2,Hi again!\n" +
			"13,2,Hi!\n" +
			"23,2,Hi again!\n";

	compareResultAsTuples(result, expected);
}
 
Example 4
Source File: SortPartitionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSortPartitionByFieldExpression() throws Exception {
	/*
	 * Test sort partition on field expression
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper()).setParallelism(4) // parallelize input
			.sortPartition("f1", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 5
Source File: ReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceWithBroadcastSet() throws Exception {
	/*
	 * Reduce with broadcast set
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints");

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n" +
			"5,2,55\n" +
			"15,3,55\n" +
			"34,4,55\n" +
			"65,5,55\n" +
			"111,6,55\n";

	compareResultAsTuples(result, expected);
}
 
Example 6
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringBasedDefinitionOnGroupSort() throws Exception {
	/*
	 * Test string-based definition on group sort, based on test:
	 * check correctness of groupReduce with descending group sort
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).sortGroup("f2", Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n"
			+
			"5,2,Hello world-Hello\n" +
			"15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
			"34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
			"65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
			"111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";

	compareResultAsTuples(result, expected);
}
 
Example 7
Source File: JavaTableEnvironmentITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTableRegister() throws Exception {
	final String tableName = "MyTable";
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env, config());

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	Table t = tableEnv.fromDataSet(ds);
	tableEnv.createTemporaryView(tableName, t);
	Table result = tableEnv.scan(tableName).select($("f0"), $("f1")).filter($("f0").isGreater(7));

	DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
	List<Row> results = resultSet.collect();
	String expected = "8,4\n" + "9,4\n" + "10,4\n" + "11,5\n" + "12,5\n" +
			"13,5\n" + "14,5\n" + "15,5\n" +
			"16,6\n" + "17,6\n" + "18,6\n" + "19,6\n" + "20,6\n" + "21,6\n";
	compareResultAsText(results, expected);
}
 
Example 8
Source File: GroupCombineITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testIdentityWithGroupByAndSort() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);

	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds
			.groupBy(1)
			.sortGroup(1, Order.DESCENDING)
			// reduce partially
			.combineGroup(new IdentityFunction())
			.groupBy(1)
			.sortGroup(1, Order.DESCENDING)
			// fully reduce
			.reduceGroup(new IdentityFunction());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	compareResultAsTuples(result, identityResult);
}
 
Example 9
Source File: PartitionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testRangePartitionByKeyField() throws Exception {
	/*
	 * Test range partition by key field
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Long> uniqLongs = ds
		.partitionByRange(1)
		.mapPartition(new UniqueTupleLongMapper());
	List<Long> result = uniqLongs.collect();

	String expected = "1\n" +
		"2\n" +
		"3\n" +
		"4\n" +
		"5\n" +
		"6\n";

	compareResultAsText(result, expected);
}
 
Example 10
Source File: SortPartitionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSortPartitionWithKeySelector2() throws Exception {
	/*
	 * Test sort partition on an extracted key
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
		.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
		.sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
			@Override
			public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> value) throws Exception {
				return new Tuple2<>(value.f0, value.f1);
			}
		}, Order.DESCENDING)
		.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
		.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 11
Source File: JavaTableEnvironmentITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testRegisterWithFields() throws Exception {
	final String tableName = "MyTable";
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env, config());

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	tableEnv.registerDataSet(tableName, ds, "a, b, c");
	Table t = tableEnv.scan(tableName);

	Table result = t.select("a, b, c");

	DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
	List<Row> results = resultSet.collect();
	String expected = "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n" +
			"4,3,Hello world, how are you?\n" + "5,3,I am fine.\n" + "6,3,Luke Skywalker\n" +
			"7,4,Comment#1\n" + "8,4,Comment#2\n" + "9,4,Comment#3\n" + "10,4,Comment#4\n" +
			"11,5,Comment#5\n" + "12,5,Comment#6\n" + "13,5,Comment#7\n" +
			"14,5,Comment#8\n" + "15,5,Comment#9\n" + "16,6,Comment#10\n" +
			"17,6,Comment#11\n" + "18,6,Comment#12\n" + "19,6,Comment#13\n" +
			"20,6,Comment#14\n" + "21,6,Comment#15\n";
	compareResultAsText(results, expected);
}
 
Example 12
Source File: PartitionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testHashPartitionByKeyFieldAndDifferentParallelism() throws Exception {
	/*
	 * Test hash partition by key field and different parallelism
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(3);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Long> uniqLongs = ds
			.partitionByHash(1).setParallelism(4)
			.mapPartition(new UniqueTupleLongMapper());
	List<Long> result = uniqLongs.collect();

	String expected = "1\n" +
			"2\n" +
			"3\n" +
			"4\n" +
			"5\n" +
			"6\n";

	compareResultAsText(result, expected);
}
 
Example 13
Source File: JavaSqlITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testAggregation() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env, config());

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	tableEnv.registerDataSet("AggTable", ds, "x, y, z");

	String sqlQuery = "SELECT sum(x), min(x), max(x), count(y), avg(x) FROM AggTable";
	Table result = tableEnv.sqlQuery(sqlQuery);

	DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
	List<Row> results = resultSet.collect();
	String expected = "231,1,21,21,11";
	compareResultAsText(results, expected);
}
 
Example 14
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfGroupReduceWithBroadcastSet() throws Exception {
	/*
	 * check correctness of groupReduce with broadcast set
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).reduceGroup(new BCTuple3GroupReduce()).withBroadcastSet(intDs, "ints");

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,55\n" +
			"5,2,55\n" +
			"15,3,55\n" +
			"34,4,55\n" +
			"65,5,55\n" +
			"111,6,55\n";

	compareResultAsTuples(result, expected);
}
 
Example 15
Source File: GroupingSetsITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Before
public void setupTables() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	tableEnv = BatchTableEnvironment.create(env, new TableConfig());

	DataSet<Tuple3<Integer, Long, String>> dataSet = CollectionDataSets.get3TupleDataSet(env);
	tableEnv.registerDataSet(TABLE_NAME, dataSet);

	MapOperator<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>> dataSetWithNulls =
		dataSet.map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {

			@Override
			public Tuple3<Integer, Long, String> map(Tuple3<Integer, Long, String> value) throws Exception {
				if (value.f2.toLowerCase().contains("world")) {
					value.f2 = null;
				}
				return value;
			}
		});
	tableEnv.registerDataSet(TABLE_WITH_NULLS_NAME, dataSetWithNulls);
}
 
Example 16
Source File: JoinITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithBroadcastSet() throws Exception {
	/*
	 * Join with broadcast set
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
	DataSet<Tuple3<String, String, Integer>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(4)
					.with(new T3T5BCJoin())
					.withBroadcastSet(intDs, "ints");

	List<Tuple3<String, String, Integer>> result = joinDs.collect();

	String expected = "Hi,Hallo,55\n" +
			"Hi,Hallo Welt wie,55\n" +
			"Hello,Hallo Welt,55\n" +
			"Hello world,Hallo Welt,55\n";

	compareResultAsTuples(result, expected);
}
 
Example 17
Source File: FlatMapITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFlatMapperIfUDFReturnsInputObjectMultipleTimesWhileChangingIt() throws Exception {
	/*
	 * Test flatmapper if UDF returns input object
	 * multiple times and changes it in between
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> inputObjFlatMapDs = ds.
			flatMap(new FlatMapper6());

	List<Tuple3<Integer, Long, String>> result = inputObjFlatMapDs.collect();

	String expected = "0,1,Hi\n" +
			"0,2,Hello\n" + "1,2,Hello\n" +
			"0,2,Hello world\n" + "1,2,Hello world\n" + "2,2,Hello world\n" +
			"0,3,I am fine.\n" +
			"0,3,Luke Skywalker\n" + "1,3,Luke Skywalker\n" +
			"0,4,Comment#1\n" + "1,4,Comment#1\n" + "2,4,Comment#1\n" +
			"0,4,Comment#3\n" +
			"0,4,Comment#4\n" + "1,4,Comment#4\n" +
			"0,5,Comment#5\n" + "1,5,Comment#5\n" + "2,5,Comment#5\n" +
			"0,5,Comment#7\n" +
			"0,5,Comment#8\n" + "1,5,Comment#8\n" +
			"0,5,Comment#9\n" + "1,5,Comment#9\n" + "2,5,Comment#9\n" +
			"0,6,Comment#11\n" +
			"0,6,Comment#12\n" + "1,6,Comment#12\n" +
			"0,6,Comment#13\n" + "1,6,Comment#13\n" + "2,6,Comment#13\n" +
			"0,6,Comment#15\n";

	compareResultAsTuples(result, expected);
}
 
Example 18
Source File: JoinITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testeUDFJoinOnTuplesWithMultipleKeyFieldPositions() throws Exception {
	/*
	 * UDF Join on tuples with multiple key field positions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.join(ds2)
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 19
Source File: JoinITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testUDFJoinOnTuplesWithTupleReturningKeySelectors() throws Exception {
	/*
	 * UDF Join on tuples with tuple-returning key selectors
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.join(ds2)
					.where(new KeySelector3())
					.equalTo(new KeySelector4())
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 20
Source File: PartitionITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionByKeyField2() throws Exception {
	/*
	 * Test range partition by key field
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	AggregateOperator<Tuple3<Integer, Long, String>> sum = ds
		.map(new PrefixMapper())
		.partitionByRange(1, 2)
		.groupBy(1, 2)
		.sum(0);

	List<Tuple3<Integer, Long, String>> result = sum.collect();

	String expected = "(1,1,Hi)\n" +
	"(5,2,Hello)\n" +
	"(4,3,Hello)\n" +
	"(5,3,I am )\n" +
	"(6,3,Luke )\n" +
	"(34,4,Comme)\n" +
	"(65,5,Comme)\n" +
	"(111,6,Comme)";

	compareResultAsText(result, expected);
}