Java Code Examples for org.apache.flink.api.java.DataSet#collect()
The following examples show how to use
org.apache.flink.api.java.DataSet#collect() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OuterJoinITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testJoinWithTupleReturningKeySelectors() throws Exception { /* * UDF Join on tuples with tuple-returning key selectors */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env); DataSet<Tuple2<String, String>> joinDs = ds1.fullOuterJoin(ds2) .where(new KeySelector3()) //0, 1 .equalTo(new KeySelector4()) // 0, 4 .with(new T3T5FlatJoin()); List<Tuple2<String, String>> result = joinDs.collect(); String expected = "Hi,Hallo\n" + "Hello,Hallo Welt\n" + "Hello world,null\n" + "null,Hallo Welt wie\n"; compareResultAsTuples(result, expected); }
Example 2
Source File: GraphOperationsITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testTriplets() throws Exception { /* * Test getTriplets() */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env); DataSet<Triplet<Long, Long, Long>> data = graph.getTriplets(); List<Triplet<Long, Long, Long>> result = data.collect(); expectedResult = "1,2,1,2,12\n" + "1,3,1,3,13\n" + "2,3,2,3,23\n" + "3,4,3,4,34\n" + "3,5,3,5,35\n" + "4,5,4,5,45\n" + "5,1,5,1,51\n"; compareResultAsTuples(result, expectedResult); }
Example 3
Source File: FilterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testFilterOnIntegerTupleField() throws Exception { /* * Test filter on Integer tuple field. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new Filter4()); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "2,2,Hello\n" + "4,3,Hello world, how are you?\n" + "6,3,Luke Skywalker\n" + "8,4,Comment#2\n" + "10,4,Comment#4\n" + "12,5,Comment#6\n" + "14,5,Comment#8\n" + "16,6,Comment#10\n" + "18,6,Comment#12\n" + "20,6,Comment#14\n"; compareResultAsTuples(result, expected); }
Example 4
Source File: ReduceOnNeighborMethodsITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSumOfAllNeighborsNoValue() throws Exception { /* * Get the sum of all neighbor values * for each vertex */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env); DataSet<Tuple2<Long, Long>> verticesWithSumOfAllNeighborValues = graph.reduceOnNeighbors(new SumNeighbors(), EdgeDirection.ALL); List<Tuple2<Long, Long>> result = verticesWithSumOfAllNeighborValues.collect(); expectedResult = "1,10\n" + "2,4\n" + "3,12\n" + "4,8\n" + "5,8\n"; compareResultAsTuples(result, expectedResult); }
Example 5
Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfGroupreduceWithDescendingGroupSort() throws Exception { /* * check correctness of groupReduce with descending group sort */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds. groupBy(1).sortGroup(2, Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "1,1,Hi\n" + "5,2,Hello world-Hello\n" + "15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" + "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" + "65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" + "111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n"; compareResultAsTuples(result, expected); }
Example 6
Source File: ReduceOnNeighborMethodsITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testSumOfInNeighbors() throws Exception { /* * Get the sum of in-neighbor values * times the edge weights for each vertex */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env); DataSet<Tuple2<Long, Long>> verticesWithSum = graph.groupReduceOnNeighbors(new SumInNeighbors(), EdgeDirection.IN); List<Tuple2<Long, Long>> result = verticesWithSum.collect(); expectedResult = "1,255\n" + "2,12\n" + "3,59\n" + "4,102\n" + "5,285\n"; compareResultAsTuples(result, expectedResult); }
Example 7
Source File: ExecutionEnvironmentITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Ensure that the user can pass a custom configuration object to the LocalEnvironment. */ @Test public void testLocalEnvironmentWithConfig() throws Exception { Configuration conf = new Configuration(); conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, PARALLELISM); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.setParallelism(ExecutionConfig.PARALLELISM_AUTO_MAX); env.getConfig().disableSysoutLogging(); DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat()) .rebalance() .mapPartition(new RichMapPartitionFunction<Integer, Integer>() { @Override public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception { out.collect(getRuntimeContext().getIndexOfThisSubtask()); } }); List<Integer> resultCollection = result.collect(); assertEquals(PARALLELISM, resultCollection.size()); }
Example 8
Source File: JoinWithVerticesITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testWithLessElements() throws Exception { /* * Test joinWithVertices with the input DataSet passed as a parameter containing * less elements than the vertex DataSet, but of the same type */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env); Graph<Long, Long, Long> res = graph.joinWithVertices(graph.getVertices().first(3) .map(new VertexToTuple2Map<>()), new AddValuesMapper()); DataSet<Vertex<Long, Long>> data = res.getVertices(); List<Vertex<Long, Long>> result = data.collect(); expectedResult = "1,2\n" + "2,4\n" + "3,6\n" + "4,4\n" + "5,5\n"; compareResultAsTuples(result, expectedResult); }
Example 9
Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testInputOfCombinerIsSortedForCombinableGroupReduceWithGroupSorting() throws Exception { /* * check that input of combiner is also sorted for combinable groupReduce with group sorting */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds. groupBy(1).sortGroup(0, Order.ASCENDING).reduceGroup(new OrderCheckingCombinableReduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "1,1,Hi\n" + "2,2,Hello\n" + "4,3,Hello world, how are you?\n" + "7,4,Comment#1\n" + "11,5,Comment#5\n" + "16,6,Comment#10\n"; compareResultAsTuples(result, expected); }
Example 10
Source File: ReduceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testReduceOnCustomTypeWithKeyExtractor() throws Exception { /* * Reduce on custom type with key extractor */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<CustomType> reduceDs = ds. groupBy(new KeySelector2()).reduce(new CustomTypeReduce()); List<CustomType> result = reduceDs.collect(); String expected = "1,0,Hi\n" + "2,3,Hello!\n" + "3,12,Hello!\n" + "4,30,Hello!\n" + "5,60,Hello!\n" + "6,105,Hello!\n"; compareResultAsText(result, expected); }
Example 11
Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfGroupReduceOnTuplesWithKeyFieldSelectorAndGroupSorting() throws Exception { /* * check correctness of groupReduce on tuples with key field selector and group sorting */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds. groupBy(1).sortGroup(2, Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); String expected = "1,1,Hi\n" + "5,2,Hello-Hello world\n" + "15,3,Hello world, how are you?-I am fine.-Luke Skywalker\n" + "34,4,Comment#1-Comment#2-Comment#3-Comment#4\n" + "65,5,Comment#5-Comment#6-Comment#7-Comment#8-Comment#9\n" + "111,6,Comment#10-Comment#11-Comment#12-Comment#13-Comment#14-Comment#15\n"; compareResultAsTuples(result, expected); }
Example 12
Source File: ScatterGatherConfigurationITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testIterationDefaultDirection() throws Exception { /* * Test that if no direction parameter is given, the iteration works as before * (i.e. it collects messages from the in-neighbors and sends them to the out-neighbors) */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Graph<Long, HashSet<Long>, Long> graph = Graph .fromCollection(TestGraphUtils.getLongLongVertices(), TestGraphUtils.getLongLongEdges(), env) .mapVertices(new InitialiseHashSetMapper()); DataSet<Vertex<Long, HashSet<Long>>> resultedVertices = graph .runScatterGatherIteration(new IdMessengerTrg(), new VertexUpdateDirection(), 5) .getVertices(); List<Vertex<Long, HashSet<Long>>> result = resultedVertices.collect(); expectedResult = "1,[5]\n" + "2,[1]\n" + "3,[1, 2]\n" + "4,[3]\n" + "5,[3, 4]"; compareResultAsTuples(result, expectedResult); }
Example 13
Source File: DistinctITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCorrectnessOfDistinctOnAtomic() throws Exception { /* * check correctness of distinct on Integers */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env); DataSet<Integer> reduceDs = ds.distinct(); List<Integer> result = reduceDs.collect(); String expected = "1\n2\n3\n4\n5"; compareResultAsText(result, expected); }
Example 14
Source File: OuterJoinITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testNestedIntoTuple() throws Exception { /* * nested into tuple */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env); DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env); DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs = ds1.fullOuterJoin(ds2) .where("nestedPojo.longNumber", "number", "nestedTupleWithCustom.f0") .equalTo("f6", "f0", "f2") .with(new ProjectBothFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>()); env.setParallelism(1); List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect(); String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" + "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" + "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n"; compareResultAsTuples(result, expected); }
Example 15
Source File: JoinITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testNonPojoToVerifyFullTupleKeys() throws Exception { /* * Non-POJO test to verify that full-tuple keys are working. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env); DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env); DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> joinDs = ds1.join(ds2).where(0).equalTo("f0.f0", "f0.f1"); // key is now Tuple2<Integer, Integer> env.setParallelism(1); List<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> result = joinDs.collect(); String expected = "((1,1),one),((1,1),one)\n" + "((2,2),two),((2,2),two)\n" + "((3,3),three),((3,3),three)\n"; compareResultAsTuples(result, expected); }
Example 16
Source File: JoinITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testNonPojoToVerifyNestedTupleElementSelectionWithFirstKeyFieldGreaterThanZero() throws Exception { /* * Non-POJO test to verify "nested" tuple-element selection with the first key field greater than 0. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>> ds2 = ds1.join(ds1).where(0).equalTo(0); DataSet<Tuple2<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>, Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>>> joinDs = ds2.join(ds2).where("f1.f0").equalTo("f0.f0"); env.setParallelism(1); List<Tuple2<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>, Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>>> result = joinDs.collect(); String expected = "((1,1,Hi),(1,1,Hi)),((1,1,Hi),(1,1,Hi))\n" + "((2,2,Hello),(2,2,Hello)),((2,2,Hello),(2,2,Hello))\n" + "((3,2,Hello world),(3,2,Hello world)),((3,2,Hello world),(3,2,Hello world))\n"; compareResultAsTuples(result, expected); }
Example 17
Source File: UnionITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUnion2IdenticalDataSets() throws Exception { /* * Union of 2 Same Data Sets */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env)); List<Tuple3<Integer, Long, String>> result = unionDs.collect(); String expected = FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING; compareResultAsTuples(result, expected); }
Example 18
Source File: MapITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTypeConversionMapperCustomToTuple() throws Exception { /* * Test type conversion mapper (Custom -> Tuple) */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env); DataSet<Tuple3<Integer, Long, String>> typeConversionMapDs = ds. map(new Mapper3()); List<Tuple3<Integer, Long, String>> result = typeConversionMapDs.collect(); String expected = "1,0,Hi\n" + "2,1,Hello\n" + "2,2,Hello world\n" + "3,3,Hello world, how are you?\n" + "3,4,I am fine.\n" + "3,5,Luke Skywalker\n" + "4,6,Comment#1\n" + "4,7,Comment#2\n" + "4,8,Comment#3\n" + "4,9,Comment#4\n" + "5,10,Comment#5\n" + "5,11,Comment#6\n" + "5,12,Comment#7\n" + "5,13,Comment#8\n" + "5,14,Comment#9\n" + "6,15,Comment#10\n" + "6,16,Comment#11\n" + "6,17,Comment#12\n" + "6,18,Comment#13\n" + "6,19,Comment#14\n" + "6,20,Comment#15\n"; compareResultAsTuples(result, expected); }
Example 19
Source File: JoinITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testProjectOnATuple1Input() throws Exception { /* * Project join on a tuple input 1 */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs = ds1.join(ds2) .where(1) .equalTo(1) .projectFirst(2, 1) .projectSecond(3) .projectFirst(0) .projectSecond(4, 1); List<Tuple6<String, Long, String, Integer, Long, Long>> result = joinDs.collect(); String expected = "Hi,1,Hallo,1,1,1\n" + "Hello,2,Hallo Welt,2,2,2\n" + "Hello world,2,Hallo Welt,3,2,2\n"; compareResultAsTuples(result, expected); }
Example 20
Source File: OrcTableSourceITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testScanWithProjectionAndFilter() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); BatchTableEnvironment tEnv = BatchTableEnvironment.create(env); OrcTableSource orc = OrcTableSource.builder() .path(getPath(TEST_FILE_FLAT)) .forOrcSchema(TEST_SCHEMA_FLAT) .build(); tEnv.registerTableSource("OrcTable", orc); String query = "SELECT " + "MIN(_col4), MAX(_col4), " + "MIN(_col3), MAX(_col3), " + "MIN(_col0), MAX(_col0), " + "MIN(_col2), MAX(_col2), " + "COUNT(*) " + "FROM OrcTable " + "WHERE (_col0 BETWEEN 4975 and 5024 OR _col0 BETWEEN 9975 AND 10024) AND _col1 = 'F'"; Table t = tEnv.sqlQuery(query); DataSet<Row> dataSet = tEnv.toDataSet(t, Row.class); List<Row> result = dataSet.collect(); assertEquals(1, result.size()); assertEquals( "1500,6000,2 yr Degree,Unknown,4976,10024,D,W,50", result.get(0).toString()); }