org.apache.flink.api.java.aggregation.Aggregations Java Exaples

Source File: AggregateITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testNestedAggregate() throws Exception {
	/*
	 * Nested Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.MIN, 0)
			.aggregate(Aggregations.MIN, 0)
			.project(0);

	List<Tuple1<Integer>> result = aggregateDs.collect();

	String expected = "1\n";

	compareResultAsTuples(result, expected);
}

Source File: AggregateOperator.java From flink with Apache License 2.0

6 votes

public AggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testGroupedAggregate() throws Exception {
	/*
	 * Grouped Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<Long, Integer>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}

Source File: AggregateOperatorTest.java From flink with Apache License 2.0

6 votes

@Test
public void testAggregationTypes() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

		// should work: multiple aggregates
		tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

		// should work: nested aggregates
		tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);

		// should not work: average on string
		try {
			tupleDs.aggregate(Aggregations.SUM, 2);
			Assert.fail();
		} catch (UnsupportedAggregationTypeException iae) {
			// we're good here
		}
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: AggregateOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public AggregateOperator(Grouping<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testNestedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Nested Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<IntValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.MIN, 0)
			.aggregate(Aggregations.MIN, 0)
			.project(0);

	List<Tuple1<IntValue>> result = aggregateDs.collect();

	String expected = "1\n";

	compareResultAsTuples(result, expected);
}

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFullAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Full Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds
			.aggregate(Aggregations.SUM, 0)
			.and(Aggregations.MAX, 1)
			.project(0, 1);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "231,6\n";

	compareResultAsTuples(result, expected);
}

Source File: TPCDSQuery55Parquet.java From parquet-flinktacular with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

		long startTime = System.currentTimeMillis();

		if (!parseParameters(args)) {
			return;
		}

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<DataDim> dataDims = getDataDimDataSet(env).map(new MapDataDim());
		DataSet<Item> item = getItemDataSet(env).map(new MapItem());
		DataSet<StoreSales> storeSales = getStoreSalesDataSet(env).map(new MapStoreSales());

		dataDims.join(storeSales).where(0).equalTo(0).with(new DataDimAndStoreSales())
			.join(item).where(1).equalTo(0).with(new DataDimAndStoreSalesAndItems())
			.groupBy(1, 0).aggregate(Aggregations.SUM, 2)
			.print();

		// execute program
		env.execute("TPC-DS Query 55 Example with Parquet input");

		System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
	}

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testGroupedAggregate() throws Exception {
	/*
	 * Grouped Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<Long, Integer>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}

Source File: ScalaAggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testGroupedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Grouped Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFullAggregate() throws Exception {
	/*
	 * Full Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Integer, Long>> aggregateDs = ds
			.aggregate(Aggregations.SUM, 0)
			.and(Aggregations.MAX, 1)
			.project(0, 1);

	List<Tuple2<Integer, Long>> result = aggregateDs.collect();

	String expected = "231,6\n";

	compareResultAsTuples(result, expected);
}

Source File: ScalaAggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Non grouped aggregation.
 */
public ScalaAggregateOperator(org.apache.flink.api.java.DataSet<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input), input.getType());

	Preconditions.checkNotNull(function);

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}

Source File: AggregateITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testGroupedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Grouped Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}

Source File: AggregateITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testGroupedAggregate() throws Exception {
	/*
	 * Grouped Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<Long, Integer>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}

Source File: AggregateITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testFullAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Full Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds
			.aggregate(Aggregations.SUM, 0)
			.and(Aggregations.MAX, 1)
			.project(0, 1);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "231,6\n";

	compareResultAsTuples(result, expected);
}

Source File: AggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Non grouped aggregation.
 */
public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input), input.getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}

Source File: AggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Non grouped aggregation.
 */
public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input), input.getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}

Source File: AggregateOperator.java From flink with Apache License 2.0

6 votes

public AggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}

Source File: AggregateOperatorTest.java From flink with Apache License 2.0

6 votes

@Test
public void testAggregationTypes() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

		// should work: multiple aggregates
		tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

		// should work: nested aggregates
		tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);

		// should not work: average on string
		try {
			tupleDs.aggregate(Aggregations.SUM, 2);
			Assert.fail();
		} catch (UnsupportedAggregationTypeException iae) {
			// we're good here
		}
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: ScalaAggregateOperator.java From flink with Apache License 2.0

6 votes

public ScalaAggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}

Source File: AggregateOperatorTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testAggregationTypes() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

		// should work: multiple aggregates
		tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

		// should work: nested aggregates
		tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);

		// should not work: average on string
		try {
			tupleDs.aggregate(Aggregations.SUM, 2);
			Assert.fail();
		} catch (UnsupportedAggregationTypeException iae) {
			// we're good here
		}
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: ScalaAggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Non grouped aggregation.
 */
public ScalaAggregateOperator(org.apache.flink.api.java.DataSet<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input), input.getType());

	Preconditions.checkNotNull(function);

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}

Source File: ScalaAggregateOperator.java From Flink-CEPplus with Apache License 2.0

6 votes

public ScalaAggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}

Source File: AggregateOperator.java From flink with Apache License 2.0

6 votes

/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public AggregateOperator(Grouping<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}

Source File: WordCount.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = getTextDataSet(env);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word,1)
				text.flatMap(new Tokenizer())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.aggregate(Aggregations.SUM, 1);

		// emit result
		if (fileOutput) {
			counts.writeAsCsv(outputPath, "\n", " ");
			// execute program
			env.execute("WordCount Example");
		} else {
			counts.print();
		}
	}

Source File: HighParallelismIterationsTestProgram.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}

Source File: HighParallelismIterationsTestProgram.java From Flink-CEPplus with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}

Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: ConnectedComponentsWithObjectMapITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

org.apache.flink.api.java.aggregation.Aggregations Java Examples