org.apache.flink.api.java.DataSet#collect

Source File: OuterJoinITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testJoinWithTupleReturningKeySelectors() throws Exception {
	/*
	 * UDF Join on tuples with tuple-returning key selectors
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.fullOuterJoin(ds2)
					.where(new KeySelector3()) //0, 1
					.equalTo(new KeySelector4()) // 0, 4
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,null\n" +
			"null,Hallo Welt wie\n";

	compareResultAsTuples(result, expected);
}

Source File: GraphOperationsITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testTriplets() throws Exception {
	/*
	 * Test getTriplets()
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Triplet<Long, Long, Long>> data = graph.getTriplets();
	List<Triplet<Long, Long, Long>> result = data.collect();

	expectedResult = "1,2,1,2,12\n" + "1,3,1,3,13\n" +
		"2,3,2,3,23\n" + "3,4,3,4,34\n" +
		"3,5,3,5,35\n" + "4,5,4,5,45\n" +
		"5,1,5,1,51\n";

	compareResultAsTuples(result, expectedResult);
}

Source File: FilterITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFilterOnIntegerTupleField() throws Exception {
	/*
	 * Test filter on Integer tuple field.
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new Filter4());
	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "2,2,Hello\n" +
			"4,3,Hello world, how are you?\n" +
			"6,3,Luke Skywalker\n" +
			"8,4,Comment#2\n" +
			"10,4,Comment#4\n" +
			"12,5,Comment#6\n" +
			"14,5,Comment#8\n" +
			"16,6,Comment#10\n" +
			"18,6,Comment#12\n" +
			"20,6,Comment#14\n";

	compareResultAsTuples(result, expected);
}

Source File: ReduceOnNeighborMethodsITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testSumOfAllNeighborsNoValue() throws Exception {
	/*
	 * Get the sum of all neighbor values
	 * for each vertex
        */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, Long>> verticesWithSumOfAllNeighborValues =
		graph.reduceOnNeighbors(new SumNeighbors(), EdgeDirection.ALL);
	List<Tuple2<Long, Long>> result = verticesWithSumOfAllNeighborValues.collect();

	expectedResult = "1,10\n" +
		"2,4\n" +
		"3,12\n" +
		"4,8\n" +
		"5,8\n";

	compareResultAsTuples(result, expectedResult);
}

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfGroupreduceWithDescendingGroupSort() throws Exception {
	/*
	 * check correctness of groupReduce with descending group sort
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).sortGroup(2, Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n"
			+
			"5,2,Hello world-Hello\n" +
			"15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
			"34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
			"65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
			"111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";

	compareResultAsTuples(result, expected);
}

Source File: ReduceOnNeighborMethodsITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testSumOfInNeighbors() throws Exception {
	/*
	 * Get the sum of in-neighbor values
	 * times the edge weights for each vertex
        */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, Long>> verticesWithSum =
		graph.groupReduceOnNeighbors(new SumInNeighbors(), EdgeDirection.IN);
	List<Tuple2<Long, Long>> result = verticesWithSum.collect();

	expectedResult = "1,255\n" +
		"2,12\n" +
		"3,59\n" +
		"4,102\n" +
		"5,285\n";

	compareResultAsTuples(result, expectedResult);
}

Source File: ExecutionEnvironmentITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Ensure that the user can pass a custom configuration object to the LocalEnvironment.
 */
@Test
public void testLocalEnvironmentWithConfig() throws Exception {
	Configuration conf = new Configuration();
	conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, PARALLELISM);

	final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
	env.setParallelism(ExecutionConfig.PARALLELISM_AUTO_MAX);
	env.getConfig().disableSysoutLogging();

	DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat())
			.rebalance()
			.mapPartition(new RichMapPartitionFunction<Integer, Integer>() {
				@Override
				public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception {
					out.collect(getRuntimeContext().getIndexOfThisSubtask());
				}
			});
	List<Integer> resultCollection = result.collect();
	assertEquals(PARALLELISM, resultCollection.size());
}

Source File: JoinWithVerticesITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testWithLessElements() throws Exception {
	/*
	 * Test joinWithVertices with the input DataSet passed as a parameter containing
	 * less elements than the vertex DataSet, but of the same type
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	Graph<Long, Long, Long> res = graph.joinWithVertices(graph.getVertices().first(3)
		.map(new VertexToTuple2Map<>()), new AddValuesMapper());

	DataSet<Vertex<Long, Long>> data = res.getVertices();
	List<Vertex<Long, Long>> result = data.collect();

	expectedResult = "1,2\n" +
		"2,4\n" +
		"3,6\n" +
		"4,4\n" +
		"5,5\n";

	compareResultAsTuples(result, expectedResult);
}

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testInputOfCombinerIsSortedForCombinableGroupReduceWithGroupSorting() throws Exception {
	/*
	 * check that input of combiner is also sorted for combinable groupReduce with group sorting
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).sortGroup(0, Order.ASCENDING).reduceGroup(new OrderCheckingCombinableReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n" +
			"2,2,Hello\n" +
			"4,3,Hello world, how are you?\n" +
			"7,4,Comment#1\n" +
			"11,5,Comment#5\n" +
			"16,6,Comment#10\n";

	compareResultAsTuples(result, expected);
}

Source File: ReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testReduceOnCustomTypeWithKeyExtractor() throws Exception {
	/*
	 * Reduce on custom type with key extractor
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.
			groupBy(new KeySelector2()).reduce(new CustomTypeReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "1,0,Hi\n" +
			"2,3,Hello!\n" +
			"3,12,Hello!\n" +
			"4,30,Hello!\n" +
			"5,60,Hello!\n" +
			"6,105,Hello!\n";

	compareResultAsText(result, expected);
}

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfGroupReduceOnTuplesWithKeyFieldSelectorAndGroupSorting() throws Exception {
	/*
	 * check correctness of groupReduce on tuples with key field selector and group sorting
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).sortGroup(2, Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n"
			+
			"5,2,Hello-Hello world\n" +
			"15,3,Hello world, how are you?-I am fine.-Luke Skywalker\n" +
			"34,4,Comment#1-Comment#2-Comment#3-Comment#4\n" +
			"65,5,Comment#5-Comment#6-Comment#7-Comment#8-Comment#9\n" +
			"111,6,Comment#10-Comment#11-Comment#12-Comment#13-Comment#14-Comment#15\n";

	compareResultAsTuples(result, expected);
}

Source File: ScatterGatherConfigurationITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testIterationDefaultDirection() throws Exception {
	/*
	 * Test that if no direction parameter is given, the iteration works as before
	 * (i.e. it collects messages from the in-neighbors and sends them to the out-neighbors)
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, HashSet<Long>, Long> graph = Graph
		.fromCollection(TestGraphUtils.getLongLongVertices(), TestGraphUtils.getLongLongEdges(), env)
		.mapVertices(new InitialiseHashSetMapper());

	DataSet<Vertex<Long, HashSet<Long>>> resultedVertices = graph
		.runScatterGatherIteration(new IdMessengerTrg(), new VertexUpdateDirection(), 5)
		.getVertices();

	List<Vertex<Long, HashSet<Long>>> result = resultedVertices.collect();

	expectedResult = "1,[5]\n" +
		"2,[1]\n" +
		"3,[1, 2]\n" +
		"4,[3]\n" +
		"5,[3, 4]";

	compareResultAsTuples(result, expectedResult);
}

Source File: DistinctITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfDistinctOnAtomic() throws Exception {
	/*
	 * check correctness of distinct on Integers
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env);
	DataSet<Integer> reduceDs = ds.distinct();

	List<Integer> result = reduceDs.collect();

	String expected = "1\n2\n3\n4\n5";

	compareResultAsText(result, expected);
}

Source File: OuterJoinITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testNestedIntoTuple() throws Exception {
	/*
	 * nested into tuple
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
	DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
	DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs =
			ds1.fullOuterJoin(ds2)
					.where("nestedPojo.longNumber", "number", "nestedTupleWithCustom.f0")
					.equalTo("f6", "f0", "f2")
					.with(new ProjectBothFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>());

	env.setParallelism(1);
	List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();

	String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
			"2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
			"3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testNonPojoToVerifyFullTupleKeys() throws Exception {
	/*
	 * Non-POJO test to verify that full-tuple keys are working.
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env);
	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env);
	DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> joinDs =
			ds1.join(ds2).where(0).equalTo("f0.f0", "f0.f1"); // key is now Tuple2<Integer, Integer>

	env.setParallelism(1);
	List<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> result = joinDs.collect();

	String expected = "((1,1),one),((1,1),one)\n" +
			"((2,2),two),((2,2),two)\n" +
			"((3,3),three),((3,3),three)\n";

	compareResultAsTuples(result, expected);

}

Source File: JoinITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testNonPojoToVerifyNestedTupleElementSelectionWithFirstKeyFieldGreaterThanZero()
		throws Exception {
	/*
	 * Non-POJO test to verify "nested" tuple-element selection with the first key field greater than 0.
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>> ds2 = ds1.join(ds1).where(0).equalTo(0);
	DataSet<Tuple2<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>, Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>>> joinDs =
			ds2.join(ds2).where("f1.f0").equalTo("f0.f0");

	env.setParallelism(1);
	List<Tuple2<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>, Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>>> result = joinDs.collect();

	String expected = "((1,1,Hi),(1,1,Hi)),((1,1,Hi),(1,1,Hi))\n" +
			"((2,2,Hello),(2,2,Hello)),((2,2,Hello),(2,2,Hello))\n" +
			"((3,2,Hello world),(3,2,Hello world)),((3,2,Hello world),(3,2,Hello world))\n";

	compareResultAsTuples(result, expected);
}

Source File: UnionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testUnion2IdenticalDataSets() throws Exception {
	/*
	 * Union of 2 Same Data Sets
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env));

	List<Tuple3<Integer, Long, String>> result = unionDs.collect();

	String expected = FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING;

	compareResultAsTuples(result, expected);
}

Source File: MapITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testTypeConversionMapperCustomToTuple() throws Exception {
	/*
	 * Test type conversion mapper (Custom -> Tuple)
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> typeConversionMapDs = ds.
			map(new Mapper3());

	List<Tuple3<Integer, Long, String>> result = typeConversionMapDs.collect();

	String expected = "1,0,Hi\n" +
			"2,1,Hello\n" +
			"2,2,Hello world\n" +
			"3,3,Hello world, how are you?\n" +
			"3,4,I am fine.\n" +
			"3,5,Luke Skywalker\n" +
			"4,6,Comment#1\n" +
			"4,7,Comment#2\n" +
			"4,8,Comment#3\n" +
			"4,9,Comment#4\n" +
			"5,10,Comment#5\n" +
			"5,11,Comment#6\n" +
			"5,12,Comment#7\n" +
			"5,13,Comment#8\n" +
			"5,14,Comment#9\n" +
			"6,15,Comment#10\n" +
			"6,16,Comment#11\n" +
			"6,17,Comment#12\n" +
			"6,18,Comment#13\n" +
			"6,19,Comment#14\n" +
			"6,20,Comment#15\n";

	compareResultAsTuples(result, expected);
}

Source File: JoinITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testProjectOnATuple1Input() throws Exception {
	/*
	 * Project join on a tuple input 1
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(1)
					.projectFirst(2, 1)
					.projectSecond(3)
					.projectFirst(0)
					.projectSecond(4, 1);

	List<Tuple6<String, Long, String, Integer, Long, Long>> result = joinDs.collect();

	String expected = "Hi,1,Hallo,1,1,1\n" +
			"Hello,2,Hallo Welt,2,2,2\n" +
			"Hello world,2,Hallo Welt,3,2,2\n";

	compareResultAsTuples(result, expected);
}

Source File: OrcTableSourceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testScanWithProjectionAndFilter() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);

	OrcTableSource orc = OrcTableSource.builder()
		.path(getPath(TEST_FILE_FLAT))
		.forOrcSchema(TEST_SCHEMA_FLAT)
		.build();
	tEnv.registerTableSource("OrcTable", orc);

	String query =
		"SELECT " +
			"MIN(_col4), MAX(_col4), " +
			"MIN(_col3), MAX(_col3), " +
			"MIN(_col0), MAX(_col0), " +
			"MIN(_col2), MAX(_col2), " +
			"COUNT(*) " +
			"FROM OrcTable " +
			"WHERE (_col0 BETWEEN 4975 and 5024 OR _col0 BETWEEN 9975 AND 10024) AND _col1 = 'F'";
	Table t = tEnv.sqlQuery(query);

	DataSet<Row> dataSet = tEnv.toDataSet(t, Row.class);
	List<Row> result = dataSet.collect();

	assertEquals(1, result.size());
	assertEquals(
		"1500,6000,2 yr Degree,Unknown,4976,10024,D,W,50",
		result.get(0).toString());
}

Java Code Examples for org.apache.flink.api.java.DataSet#collect()