org.apache.flink.test.operators.util.CollectionDataSets#get5TupleDataSet

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testTupleKeySelectorSortCombineOnTuple() throws Exception {
	/*
	 * check correctness of sorted groupReduceon with Tuple2 keyselector sorting
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
			.groupBy(new IntFieldExtractor<Tuple5<Integer, Long, Integer, String, Long>>(0))
			.sortGroup(new FiveToTwoTupleExtractor(), Order.DESCENDING)
			.reduceGroup(new Tuple5SortedGroupReduce());

	List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs.collect();

	String expected = "1,1,0,Hallo,1\n"
			+
			"2,5,0,Hallo Welt-Hallo Welt wie,1\n" +
			"3,15,0,BCD-ABC-Hallo Welt wie gehts?,2\n" +
			"4,34,0,FGH-CDE-EFG-DEF,1\n" +
			"5,65,0,IJK-HIJ-KLM-JKL-GHI,1\n";

	compareResultAsTuples(result, expected);
}

Source File: SortPartitionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testSortPartitionByTwoFieldExpressions() throws Exception {
	/*
	 * Test sort partition on two field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
			.sortPartition("f4", Order.ASCENDING)
			.sortPartition("f2", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testDefaultJoinOnTuples() throws Exception {
	/*
	 * Default Join on tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<Tuple3<Integer, Long, String>, Tuple5<Integer, Long, Integer, String, Long>>> joinDs =
			ds1.join(ds2)
					.where(0)
					.equalTo(2);

	List<Tuple2<Tuple3<Integer, Long, String>, Tuple5<Integer, Long, Integer, String, Long>>> result = joinDs.collect();

	String expected = "(1,1,Hi),(2,2,1,Hallo Welt,2)\n" +
			"(2,2,Hello),(2,3,2,Hallo Welt wie,1)\n" +
			"(3,2,Hello world),(3,4,3,Hallo Welt wie gehts?,2)\n";

	compareResultAsTuples(result, expected);

}

Source File: SortPartitionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testSortPartitionByTwoFieldExpressions() throws Exception {
	/*
	 * Test sort partition on two field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
			.sortPartition("f4", Order.ASCENDING)
			.sortPartition("f2", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Source File: JoinITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testJoinThatReturnsTheRightInputObject() throws Exception {
	/*
	 * Join that returns the right input object
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(1)
					.with(new RightReturningJoin());

	List<Tuple5<Integer, Long, Integer, String, Long>> result = joinDs.collect();

	String expected = "1,1,0,Hallo,1\n" +
			"2,2,1,Hallo Welt,2\n" +
			"2,2,1,Hallo Welt,2\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testJoinWithHuge() throws Exception {
	/*
	 * Join with Huge
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs = ds1.joinWithHuge(ds2)
			.where(1)
			.equalTo(1)
			.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt\n";

	compareResultAsTuples(result, expected);
}

Source File: SortPartitionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testSortPartitionByTwoKeyFields() throws Exception {
	/*
	 * Test sort partition on two key fields
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
			.sortPartition(4, Order.ASCENDING)
			.sortPartition(2, Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testJoinThatReturnsTheRightInputObject() throws Exception {
	/*
	 * Join that returns the right input object
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(1)
					.with(new RightReturningJoin());

	List<Tuple5<Integer, Long, Integer, String, Long>> result = joinDs.collect();

	String expected = "1,1,0,Hallo,1\n" +
			"2,2,1,Hallo Welt,2\n" +
			"2,2,1,Hallo Welt,2\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testJoinWithTiny() throws Exception {
	/*
	 * Join with Tiny
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.joinWithTiny(ds2)
					.where(1)
					.equalTo(1)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}

Source File: ProjectITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	/*
	 * Projection with tuple fields indexes
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<String, Long, Integer>> projDs = ds.
			project(3, 4, 2);
	List<Tuple3<String, Long, Integer>> result = projDs.collect();

	String expectedResult = "Hallo,1,0\n" +
			"Hallo Welt,2,1\n" +
			"Hallo Welt wie,1,2\n" +
			"Hallo Welt wie gehts?,2,3\n" +
			"ABC,2,4\n" +
			"BCD,3,5\n" +
			"CDE,2,6\n" +
			"DEF,1,7\n" +
			"EFG,1,8\n" +
			"FGH,2,9\n" +
			"GHI,1,10\n" +
			"HIJ,3,11\n" +
			"IJK,3,12\n" +
			"JKL,2,13\n" +
			"KLM,2,14\n";

	compareResultAsTuples(result, expectedResult);
}

Source File: JoinITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testProjectOnATuple1Input() throws Exception {
	/*
	 * Project join on a tuple input 1
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(1)
					.projectFirst(2, 1)
					.projectSecond(3)
					.projectFirst(0)
					.projectSecond(4, 1);

	List<Tuple6<String, Long, String, Integer, Long, Long>> result = joinDs.collect();

	String expected = "Hi,1,Hallo,1,1,1\n" +
			"Hello,2,Hallo Welt,2,2,2\n" +
			"Hello world,2,Hallo Welt,3,2,2\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testProjectOnATuple1Input() throws Exception {
	/*
	 * Project join on a tuple input 1
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(1)
					.projectFirst(2, 1)
					.projectSecond(3)
					.projectFirst(0)
					.projectSecond(4, 1);

	List<Tuple6<String, Long, String, Integer, Long, Long>> result = joinDs.collect();

	String expected = "Hi,1,Hallo,1,1,1\n" +
			"Hello,2,Hallo Welt,2,2,2\n" +
			"Hello world,2,Hallo Welt,3,2,2\n";

	compareResultAsTuples(result, expected);
}

Source File: DistinctITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testCorrectnessOfDistinctOnCustomTypeWithTupleReturningTypeExtractor() throws Exception{
	/*
	 * check correctness of distinct on custom type with tuple-returning type extractor
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<Integer, Long>> reduceDs = ds
			.distinct(new KeySelector2())
			.project(0, 4);

	List<Tuple2<Integer, Long>> result = reduceDs.collect();

	String expected = "1,1\n" +
			"2,1\n" +
			"2,2\n" +
			"3,2\n" +
			"3,3\n" +
			"4,1\n" +
			"4,2\n" +
			"5,1\n" +
			"5,2\n" +
			"5,3\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testUDFJoinOnTuplesWithTupleReturningKeySelectors() throws Exception {
	/*
	 * UDF Join on tuples with tuple-returning key selectors
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.join(ds2)
					.where(new KeySelector3())
					.equalTo(new KeySelector4())
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}

Source File: ReduceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testReduceATupleReturningKeySelector() throws Exception {
	/*
	 * Reduce with a Tuple-returning KeySelector
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long,  Integer, String, Long>> reduceDs = ds
			.groupBy(new KeySelector3()).reduce(new Tuple5Reduce());

	List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs
			.collect();

	String expected = "1,1,0,Hallo,1\n" +
			"2,3,2,Hallo Welt wie,1\n" +
			"2,2,1,Hallo Welt,2\n" +
			"3,9,0,P-),2\n" +
			"3,6,5,BCD,3\n" +
			"4,17,0,P-),1\n" +
			"4,17,0,P-),2\n" +
			"5,11,10,GHI,1\n" +
			"5,29,0,P-),2\n" +
			"5,25,0,P-),3\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testUDFJoinOnTuplesWithTupleReturningKeySelectors() throws Exception {
	/*
	 * UDF Join on tuples with tuple-returning key selectors
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.join(ds2)
					.where(new KeySelector3())
					.equalTo(new KeySelector4())
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}

Source File: GroupReduceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testCorrectnessOfGroupReduceOnTuplesWithTupleReturningKeySelector() throws Exception {
	/*
	 * check correctness of groupReduce on tuples with tuple-returning key selector
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
			groupBy(new KeySelector4()).reduceGroup(new Tuple5GroupReduce());

	List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs.collect();

	String expected = "1,1,0,P-),1\n" +
			"2,3,0,P-),1\n" +
			"2,2,0,P-),2\n" +
			"3,9,0,P-),2\n" +
			"3,6,0,P-),3\n" +
			"4,17,0,P-),1\n" +
			"4,17,0,P-),2\n" +
			"5,11,0,P-),1\n" +
			"5,29,0,P-),2\n" +
			"5,25,0,P-),3\n";

	compareResultAsTuples(result, expected);
}

Source File: CoGroupITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testProjectJoinOnATuple2Input() throws Exception {
	/*
	 * Project join on a tuple input 2
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple6<String, String, Long, Long, Long, Integer>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(1)
					.projectSecond(3)
					.projectFirst(2, 1)
					.projectSecond(4, 1)
					.projectFirst(0);

	List<Tuple6<String, String, Long, Long, Long, Integer>> result = joinDs.collect();

	String expected = "Hallo,Hi,1,1,1,1\n" +
			"Hallo Welt,Hello,2,2,2,2\n" +
			"Hallo Welt,Hello world,2,2,2,3\n";

	compareResultAsTuples(result, expected);
}

Java Code Examples for org.apache.flink.test.operators.util.CollectionDataSets#get5TupleDataSet()