org.apache.flink.api.java.aggregation.Aggregations Java Examples
The following examples show how to use
org.apache.flink.api.java.aggregation.Aggregations.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AggregateITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testNestedAggregate() throws Exception { /* * Nested Aggregate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.MIN, 0) .aggregate(Aggregations.MIN, 0) .project(0); List<Tuple1<Integer>> result = aggregateDs.collect(); String expected = "1\n"; compareResultAsTuples(result, expected); }
Example #2
Source File: AggregateOperator.java From flink with Apache License 2.0 | 6 votes |
public AggregateOperator<IN> and(Aggregations function, int field) { Preconditions.checkNotNull(function); TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); this.aggregationFunctions.add(aggFunct); this.fields.add(field); return this; }
Example #3
Source File: AggregateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGroupedAggregate() throws Exception { /* * Grouped Aggregate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.SUM, 0) .project(1, 0); List<Tuple2<Long, Integer>> result = aggregateDs.collect(); String expected = "1,1\n" + "2,5\n" + "3,15\n" + "4,34\n" + "5,65\n" + "6,111\n"; compareResultAsTuples(result, expected); }
Example #4
Source File: AggregateOperatorTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testAggregationTypes() { try { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work: multiple aggregates tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4); // should work: nested aggregates tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1); // should not work: average on string try { tupleDs.aggregate(Aggregations.SUM, 2); Assert.fail(); } catch (UnsupportedAggregationTypeException iae) { // we're good here } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #5
Source File: AggregateOperator.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Grouped aggregation. * * @param input * @param function * @param field */ public AggregateOperator(Grouping<IN> input, Aggregations function, int field, String aggregateLocationName) { super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType()); Preconditions.checkNotNull(function); this.aggregateLocationName = aggregateLocationName; if (!input.getInputDataSet().getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // set the aggregation fields this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = input; }
Example #6
Source File: AggregateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testNestedAggregateOfMutableValueTypes() throws Exception { /* * Nested Aggregate of mutable value types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env); DataSet<Tuple1<IntValue>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.MIN, 0) .aggregate(Aggregations.MIN, 0) .project(0); List<Tuple1<IntValue>> result = aggregateDs.collect(); String expected = "1\n"; compareResultAsTuples(result, expected); }
Example #7
Source File: AggregateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testFullAggregateOfMutableValueTypes() throws Exception { /* * Full Aggregate of mutable value types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1); List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect(); String expected = "231,6\n"; compareResultAsTuples(result, expected); }
Example #8
Source File: TPCDSQuery55Parquet.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { long startTime = System.currentTimeMillis(); if (!parseParameters(args)) { return; } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<DataDim> dataDims = getDataDimDataSet(env).map(new MapDataDim()); DataSet<Item> item = getItemDataSet(env).map(new MapItem()); DataSet<StoreSales> storeSales = getStoreSalesDataSet(env).map(new MapStoreSales()); dataDims.join(storeSales).where(0).equalTo(0).with(new DataDimAndStoreSales()) .join(item).where(1).equalTo(0).with(new DataDimAndStoreSalesAndItems()) .groupBy(1, 0).aggregate(Aggregations.SUM, 2) .print(); // execute program env.execute("TPC-DS Query 55 Example with Parquet input"); System.out.println("Execution time: " + (System.currentTimeMillis() - startTime)); }
Example #9
Source File: AggregateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGroupedAggregate() throws Exception { /* * Grouped Aggregate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.SUM, 0) .project(1, 0); List<Tuple2<Long, Integer>> result = aggregateDs.collect(); String expected = "1,1\n" + "2,5\n" + "3,15\n" + "4,34\n" + "5,65\n" + "6,111\n"; compareResultAsTuples(result, expected); }
Example #10
Source File: ScalaAggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Grouped aggregation. * * @param input * @param function * @param field */ public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) { super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType()); Preconditions.checkNotNull(function); if (!input.getInputDataSet().getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // set the aggregation fields this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = input; }
Example #11
Source File: AggregateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGroupedAggregateOfMutableValueTypes() throws Exception { /* * Grouped Aggregate of mutable value types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.SUM, 0) .project(1, 0); List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect(); String expected = "1,1\n" + "2,5\n" + "3,15\n" + "4,34\n" + "5,65\n" + "6,111\n"; compareResultAsTuples(result, expected); }
Example #12
Source File: AggregateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testFullAggregate() throws Exception { /* * Full Aggregate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Integer, Long>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1); List<Tuple2<Integer, Long>> result = aggregateDs.collect(); String expected = "231,6\n"; compareResultAsTuples(result, expected); }
Example #13
Source File: ScalaAggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Non grouped aggregation. */ public ScalaAggregateOperator(org.apache.flink.api.java.DataSet<IN> input, Aggregations function, int field) { super(Preconditions.checkNotNull(input), input.getType()); Preconditions.checkNotNull(function); if (!input.getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // this is the first aggregation operator after a regular data set (non grouped aggregation) this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = null; }
Example #14
Source File: AggregateITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testGroupedAggregateOfMutableValueTypes() throws Exception { /* * Grouped Aggregate of mutable value types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.SUM, 0) .project(1, 0); List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect(); String expected = "1,1\n" + "2,5\n" + "3,15\n" + "4,34\n" + "5,65\n" + "6,111\n"; compareResultAsTuples(result, expected); }
Example #15
Source File: AggregateITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testGroupedAggregate() throws Exception { /* * Grouped Aggregate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.SUM, 0) .project(1, 0); List<Tuple2<Long, Integer>> result = aggregateDs.collect(); String expected = "1,1\n" + "2,5\n" + "3,15\n" + "4,34\n" + "5,65\n" + "6,111\n"; compareResultAsTuples(result, expected); }
Example #16
Source File: AggregateITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testFullAggregateOfMutableValueTypes() throws Exception { /* * Full Aggregate of mutable value types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1); List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect(); String expected = "231,6\n"; compareResultAsTuples(result, expected); }
Example #17
Source File: AggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Non grouped aggregation. */ public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) { super(Preconditions.checkNotNull(input), input.getType()); Preconditions.checkNotNull(function); this.aggregateLocationName = aggregateLocationName; if (!input.getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // this is the first aggregation operator after a regular data set (non grouped aggregation) this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = null; }
Example #18
Source File: AggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Non grouped aggregation. */ public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) { super(Preconditions.checkNotNull(input), input.getType()); Preconditions.checkNotNull(function); this.aggregateLocationName = aggregateLocationName; if (!input.getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // this is the first aggregation operator after a regular data set (non grouped aggregation) this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = null; }
Example #19
Source File: AggregateOperator.java From flink with Apache License 2.0 | 6 votes |
public AggregateOperator<IN> and(Aggregations function, int field) { Preconditions.checkNotNull(function); TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); this.aggregationFunctions.add(aggFunct); this.fields.add(field); return this; }
Example #20
Source File: AggregateOperatorTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testAggregationTypes() { try { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work: multiple aggregates tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4); // should work: nested aggregates tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1); // should not work: average on string try { tupleDs.aggregate(Aggregations.SUM, 2); Assert.fail(); } catch (UnsupportedAggregationTypeException iae) { // we're good here } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #21
Source File: ScalaAggregateOperator.java From flink with Apache License 2.0 | 6 votes |
public ScalaAggregateOperator<IN> and(Aggregations function, int field) { Preconditions.checkNotNull(function); TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); this.aggregationFunctions.add(aggFunct); this.fields.add(field); return this; }
Example #22
Source File: AggregateOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testAggregationTypes() { try { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work: multiple aggregates tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4); // should work: nested aggregates tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1); // should not work: average on string try { tupleDs.aggregate(Aggregations.SUM, 2); Assert.fail(); } catch (UnsupportedAggregationTypeException iae) { // we're good here } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #23
Source File: ScalaAggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Non grouped aggregation. */ public ScalaAggregateOperator(org.apache.flink.api.java.DataSet<IN> input, Aggregations function, int field) { super(Preconditions.checkNotNull(input), input.getType()); Preconditions.checkNotNull(function); if (!input.getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // this is the first aggregation operator after a regular data set (non grouped aggregation) this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = null; }
Example #24
Source File: ScalaAggregateOperator.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public ScalaAggregateOperator<IN> and(Aggregations function, int field) { Preconditions.checkNotNull(function); TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); this.aggregationFunctions.add(aggFunct); this.fields.add(field); return this; }
Example #25
Source File: AggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Grouped aggregation. * * @param input * @param function * @param field */ public AggregateOperator(Grouping<IN> input, Aggregations function, int field, String aggregateLocationName) { super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType()); Preconditions.checkNotNull(function); this.aggregateLocationName = aggregateLocationName; if (!input.getInputDataSet().getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // set the aggregation fields this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = input; }
Example #26
Source File: WordCount.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if (fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); // execute program env.execute("WordCount Example"); } else { counts.print(); } }
Example #27
Source File: HighParallelismIterationsTestProgram.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<>()); env.execute(); }
Example #28
Source File: HighParallelismIterationsTestProgram.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<>()); env.execute(); }
Example #29
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet() .join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example #30
Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }