org.apache.flink.graph.library.linkanalysis.PageRank.Result Java Exaples

Source File: PageRankTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testWithSimpleGraph() throws Exception {
	DataSet<Result<IntValue>> pr = new PageRank<IntValue, NullValue, NullValue>(DAMPING_FACTOR, 20)
		.run(directedSimpleGraph);

	List<Double> expectedResults = new ArrayList<>();
	expectedResults.add(0.0909212166211);
	expectedResults.add(0.279516064311);
	expectedResults.add(0.129562719068);
	expectedResults.add(0.223268406353);
	expectedResults.add(0.185810377026);
	expectedResults.add(0.0909212166211);

	for (Result<IntValue> result : pr.collect()) {
		int id = result.getVertexId0().getValue();
		assertEquals(expectedResults.get(id), result.getPageRankScore().getValue(), ACCURACY);
	}
}

Source File: PageRankTest.java From flink with Apache License 2.0

6 votes

@Test
public void testWithSimpleGraph() throws Exception {
	DataSet<Result<IntValue>> pr = new PageRank<IntValue, NullValue, NullValue>(DAMPING_FACTOR, 20)
		.run(directedSimpleGraph);

	List<Double> expectedResults = new ArrayList<>();
	expectedResults.add(0.0909212166211);
	expectedResults.add(0.279516064311);
	expectedResults.add(0.129562719068);
	expectedResults.add(0.223268406353);
	expectedResults.add(0.185810377026);
	expectedResults.add(0.0909212166211);

	for (Result<IntValue> result : pr.collect()) {
		int id = result.getVertexId0().getValue();
		assertEquals(expectedResults.get(id), result.getPageRankScore().getValue(), ACCURACY);
	}
}

Source File: PageRankTest.java From flink with Apache License 2.0

6 votes

@Test
public void testWithSimpleGraph() throws Exception {
	DataSet<Result<IntValue>> pr = new PageRank<IntValue, NullValue, NullValue>(DAMPING_FACTOR, 20)
		.run(directedSimpleGraph);

	List<Double> expectedResults = new ArrayList<>();
	expectedResults.add(0.0909212166211);
	expectedResults.add(0.279516064311);
	expectedResults.add(0.129562719068);
	expectedResults.add(0.223268406353);
	expectedResults.add(0.185810377026);
	expectedResults.add(0.0909212166211);

	for (Result<IntValue> result : pr.collect()) {
		int id = result.getVertexId0().getValue();
		assertEquals(expectedResults.get(id), result.getPageRankScore().getValue(), ACCURACY);
	}
}

Source File: PageRankTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Validate a test where each result has the same values.
 *
 * @param graph input graph
 * @param count number of results
 * @param score result PageRank score
 * @param <T> graph ID type
 * @throws Exception on error
 */
private static <T> void validate(Graph<T, NullValue, NullValue> graph, long count, double score) throws Exception {
	DataSet<Result<T>> pr = new PageRank<T, NullValue, NullValue>(DAMPING_FACTOR, ACCURACY)
		.setIncludeZeroDegreeVertices(true)
		.run(graph);

	List<Result<T>> results = pr.collect();

	assertEquals(count, results.size());

	for (Result<T> result : results) {
		assertEquals(score, result.getPageRankScore().getValue(), ACCURACY);
	}
}

Source File: PageRankTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testWithRMatGraph() throws Exception {
	DataSet<Result<LongValue>> pr = new PageRank<LongValue, NullValue, NullValue>(DAMPING_FACTOR, ACCURACY)
		.run(directedRMatGraph(10, 16));

	Map<Long, Result<LongValue>> results = new HashMap<>();
	for (Result<LongValue> result :  new Collect<Result<LongValue>>().run(pr).execute()) {
		results.put(result.getVertexId0().getValue(), result);
	}

	assertEquals(902, results.size());

	Map<Long, Double> expectedResults = new HashMap<>();
	// a pseudo-random selection of results, both high and low
	expectedResults.put(0L, 0.0271152394743);
	expectedResults.put(1L, 0.0132848430616);
	expectedResults.put(2L, 0.0121819700294);
	expectedResults.put(8L, 0.0115923214664);
	expectedResults.put(13L, 0.00183241122822);
	expectedResults.put(29L, 0.000848190646547);
	expectedResults.put(109L, 0.00030846825644);
	expectedResults.put(394L, 0.000828826945546);
	expectedResults.put(652L, 0.000683948671035);
	expectedResults.put(1020L, 0.000250442325034);

	for (Map.Entry<Long, Double> expected : expectedResults.entrySet()) {
		double value = results.get(expected.getKey()).getPageRankScore().getValue();

		assertEquals(expected.getValue(), value, ACCURACY);
	}
}

Source File: PageRankTest.java From flink with Apache License 2.0

5 votes

/**
 * Validate a test where each result has the same values.
 *
 * @param graph input graph
 * @param count number of results
 * @param score result PageRank score
 * @param <T> graph ID type
 * @throws Exception on error
 */
private static <T> void validate(Graph<T, NullValue, NullValue> graph, long count, double score) throws Exception {
	DataSet<Result<T>> pr = new PageRank<T, NullValue, NullValue>(DAMPING_FACTOR, ACCURACY)
		.setIncludeZeroDegreeVertices(true)
		.run(graph);

	List<Result<T>> results = pr.collect();

	assertEquals(count, results.size());

	for (Result<T> result : results) {
		assertEquals(score, result.getPageRankScore().getValue(), ACCURACY);
	}
}

Source File: PageRankTest.java From flink with Apache License 2.0

5 votes

@Test
public void testWithRMatGraph() throws Exception {
	DataSet<Result<LongValue>> pr = new PageRank<LongValue, NullValue, NullValue>(DAMPING_FACTOR, ACCURACY)
		.run(directedRMatGraph(10, 16));

	Map<Long, Result<LongValue>> results = new HashMap<>();
	for (Result<LongValue> result :  new Collect<Result<LongValue>>().run(pr).execute()) {
		results.put(result.getVertexId0().getValue(), result);
	}

	assertEquals(902, results.size());

	Map<Long, Double> expectedResults = new HashMap<>();
	// a pseudo-random selection of results, both high and low
	expectedResults.put(0L, 0.0271152394743);
	expectedResults.put(1L, 0.0132848430616);
	expectedResults.put(2L, 0.0121819700294);
	expectedResults.put(8L, 0.0115923214664);
	expectedResults.put(13L, 0.00183241122822);
	expectedResults.put(29L, 0.000848190646547);
	expectedResults.put(109L, 0.00030846825644);
	expectedResults.put(394L, 0.000828826945546);
	expectedResults.put(652L, 0.000683948671035);
	expectedResults.put(1020L, 0.000250442325034);

	for (Map.Entry<Long, Double> expected : expectedResults.entrySet()) {
		double value = results.get(expected.getKey()).getPageRankScore().getValue();

		assertEquals(expected.getValue(), value, ACCURACY);
	}
}

Source File: PageRankTest.java From flink with Apache License 2.0

5 votes

/**
 * Validate a test where each result has the same values.
 *
 * @param graph input graph
 * @param count number of results
 * @param score result PageRank score
 * @param <T> graph ID type
 * @throws Exception on error
 */
private static <T> void validate(Graph<T, NullValue, NullValue> graph, long count, double score) throws Exception {
	DataSet<Result<T>> pr = new PageRank<T, NullValue, NullValue>(DAMPING_FACTOR, ACCURACY)
		.setIncludeZeroDegreeVertices(true)
		.run(graph);

	List<Result<T>> results = pr.collect();

	assertEquals(count, results.size());

	for (Result<T> result : results) {
		assertEquals(score, result.getPageRankScore().getValue(), ACCURACY);
	}
}

Source File: PageRankTest.java From flink with Apache License 2.0

5 votes

@Test
public void testWithRMatGraph() throws Exception {
	DataSet<Result<LongValue>> pr = new PageRank<LongValue, NullValue, NullValue>(DAMPING_FACTOR, ACCURACY)
		.run(directedRMatGraph(10, 16));

	Map<Long, Result<LongValue>> results = new HashMap<>();
	for (Result<LongValue> result :  new Collect<Result<LongValue>>().run(pr).execute()) {
		results.put(result.getVertexId0().getValue(), result);
	}

	assertEquals(902, results.size());

	Map<Long, Double> expectedResults = new HashMap<>();
	// a pseudo-random selection of results, both high and low
	expectedResults.put(0L, 0.0271152394743);
	expectedResults.put(1L, 0.0132848430616);
	expectedResults.put(2L, 0.0121819700294);
	expectedResults.put(8L, 0.0115923214664);
	expectedResults.put(13L, 0.00183241122822);
	expectedResults.put(29L, 0.000848190646547);
	expectedResults.put(109L, 0.00030846825644);
	expectedResults.put(394L, 0.000828826945546);
	expectedResults.put(652L, 0.000683948671035);
	expectedResults.put(1020L, 0.000250442325034);

	for (Map.Entry<Long, Double> expected : expectedResults.entrySet()) {
		double value = results.get(expected.getKey()).getPageRankScore().getValue();

		assertEquals(expected.getValue(), value, ACCURACY);
	}
}

Source File: PageRank.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// vertex degree
	DataSet<Vertex<K, Degrees>> vertexDegree = input
		.run(new VertexDegrees<K, VV, EV>()
			.setIncludeZeroDegreeVertices(includeZeroDegreeVertices)
			.setParallelism(parallelism));

	// vertex count
	DataSet<LongValue> vertexCount = GraphUtils.count(vertexDegree);

	// s, t, d(s)
	DataSet<Edge<K, LongValue>> edgeSourceDegree = input
		.run(new EdgeSourceDegrees<K, VV, EV>()
			.setParallelism(parallelism))
		.map(new ExtractSourceDegree<>())
			.setParallelism(parallelism)
			.name("Extract source degree");

	// vertices with zero in-edges
	DataSet<Tuple2<K, DoubleValue>> sourceVertices = vertexDegree
		.flatMap(new InitializeSourceVertices<>())
			.setParallelism(parallelism)
			.name("Initialize source vertex scores");

	// s, initial pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> initialScores = vertexDegree
		.map(new InitializeVertexScores<>())
		.withBroadcastSet(vertexCount, VERTEX_COUNT)
			.setParallelism(parallelism)
			.name("Initialize scores");

	IterativeDataSet<Tuple2<K, DoubleValue>> iterative = initialScores
		.iterate(maxIterations)
		.setParallelism(parallelism);

	// s, projected pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> vertexScores = iterative
		.coGroup(edgeSourceDegree)
		.where(0)
		.equalTo(0)
		.with(new SendScore<>())
			.setParallelism(parallelism)
			.name("Send score")
		.groupBy(0)
		.reduce(new SumScore<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Sum");

	// ignored ID, total pagerank
	DataSet<Tuple2<K, DoubleValue>> sumOfScores = vertexScores
		.reduce(new SumVertexScores<>())
			.setParallelism(parallelism)
			.name("Sum");

	// s, adjusted pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> adjustedScores = vertexScores
		.union(sourceVertices)
			.name("Union with source vertices")
		.map(new AdjustScores<>(dampingFactor))
			.withBroadcastSet(sumOfScores, SUM_OF_SCORES)
			.withBroadcastSet(vertexCount, VERTEX_COUNT)
				.setParallelism(parallelism)
				.name("Adjust scores");

	DataSet<Tuple2<K, DoubleValue>> passThrough;

	if (convergenceThreshold < Double.MAX_VALUE) {
		passThrough = iterative
			.join(adjustedScores)
			.where(0)
			.equalTo(0)
			.with(new ChangeInScores<>())
				.setParallelism(parallelism)
				.name("Change in scores");

		iterative.registerAggregationConvergenceCriterion(CHANGE_IN_SCORES, new DoubleSumAggregator(), new ScoreConvergence(convergenceThreshold));
	} else {
		passThrough = adjustedScores;
	}

	return iterative
		.closeWith(passThrough)
		.map(new TranslateResult<>())
			.setParallelism(parallelism)
			.name("Map result");
}

Source File: PageRank.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
public Result<T> map(Tuple2<T, DoubleValue> value) throws Exception {
	output.setVertexId0(value.f0);
	output.setPageRankScore(value.f1);
	return output;
}

Source File: PageRank.java From flink with Apache License 2.0

4 votes

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// vertex degree
	DataSet<Vertex<K, Degrees>> vertexDegree = input
		.run(new VertexDegrees<K, VV, EV>()
			.setIncludeZeroDegreeVertices(includeZeroDegreeVertices)
			.setParallelism(parallelism));

	// vertex count
	DataSet<LongValue> vertexCount = GraphUtils.count(vertexDegree);

	// s, t, d(s)
	DataSet<Edge<K, LongValue>> edgeSourceDegree = input
		.run(new EdgeSourceDegrees<K, VV, EV>()
			.setParallelism(parallelism))
		.map(new ExtractSourceDegree<>())
			.setParallelism(parallelism)
			.name("Extract source degree");

	// vertices with zero in-edges
	DataSet<Tuple2<K, DoubleValue>> sourceVertices = vertexDegree
		.flatMap(new InitializeSourceVertices<>())
			.setParallelism(parallelism)
			.name("Initialize source vertex scores");

	// s, initial pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> initialScores = vertexDegree
		.map(new InitializeVertexScores<>())
		.withBroadcastSet(vertexCount, VERTEX_COUNT)
			.setParallelism(parallelism)
			.name("Initialize scores");

	IterativeDataSet<Tuple2<K, DoubleValue>> iterative = initialScores
		.iterate(maxIterations)
		.setParallelism(parallelism);

	// s, projected pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> vertexScores = iterative
		.coGroup(edgeSourceDegree)
		.where(0)
		.equalTo(0)
		.with(new SendScore<>())
			.setParallelism(parallelism)
			.name("Send score")
		.groupBy(0)
		.reduce(new SumScore<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Sum");

	// ignored ID, total pagerank
	DataSet<Tuple2<K, DoubleValue>> sumOfScores = vertexScores
		.reduce(new SumVertexScores<>())
			.setParallelism(parallelism)
			.name("Sum");

	// s, adjusted pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> adjustedScores = vertexScores
		.union(sourceVertices)
			.name("Union with source vertices")
		.map(new AdjustScores<>(dampingFactor))
			.withBroadcastSet(sumOfScores, SUM_OF_SCORES)
			.withBroadcastSet(vertexCount, VERTEX_COUNT)
				.setParallelism(parallelism)
				.name("Adjust scores");

	DataSet<Tuple2<K, DoubleValue>> passThrough;

	if (convergenceThreshold < Double.MAX_VALUE) {
		passThrough = iterative
			.join(adjustedScores)
			.where(0)
			.equalTo(0)
			.with(new ChangeInScores<>())
				.setParallelism(parallelism)
				.name("Change in scores");

		iterative.registerAggregationConvergenceCriterion(CHANGE_IN_SCORES, new DoubleSumAggregator(), new ScoreConvergence(convergenceThreshold));
	} else {
		passThrough = adjustedScores;
	}

	return iterative
		.closeWith(passThrough)
		.map(new TranslateResult<>())
			.setParallelism(parallelism)
			.name("Map result");
}

Source File: PageRank.java From flink with Apache License 2.0

4 votes

@Override
public Result<T> map(Tuple2<T, DoubleValue> value) throws Exception {
	output.setVertexId0(value.f0);
	output.setPageRankScore(value.f1);
	return output;
}

Source File: PageRank.java From flink with Apache License 2.0

4 votes

@Override
public DataSet<Result<K>> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// vertex degree
	DataSet<Vertex<K, Degrees>> vertexDegree = input
		.run(new VertexDegrees<K, VV, EV>()
			.setIncludeZeroDegreeVertices(includeZeroDegreeVertices)
			.setParallelism(parallelism));

	// vertex count
	DataSet<LongValue> vertexCount = GraphUtils.count(vertexDegree);

	// s, t, d(s)
	DataSet<Edge<K, LongValue>> edgeSourceDegree = input
		.run(new EdgeSourceDegrees<K, VV, EV>()
			.setParallelism(parallelism))
		.map(new ExtractSourceDegree<>())
			.setParallelism(parallelism)
			.name("Extract source degree");

	// vertices with zero in-edges
	DataSet<Tuple2<K, DoubleValue>> sourceVertices = vertexDegree
		.flatMap(new InitializeSourceVertices<>())
			.setParallelism(parallelism)
			.name("Initialize source vertex scores");

	// s, initial pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> initialScores = vertexDegree
		.map(new InitializeVertexScores<>())
		.withBroadcastSet(vertexCount, VERTEX_COUNT)
			.setParallelism(parallelism)
			.name("Initialize scores");

	IterativeDataSet<Tuple2<K, DoubleValue>> iterative = initialScores
		.iterate(maxIterations)
		.setParallelism(parallelism);

	// s, projected pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> vertexScores = iterative
		.coGroup(edgeSourceDegree)
		.where(0)
		.equalTo(0)
		.with(new SendScore<>())
			.setParallelism(parallelism)
			.name("Send score")
		.groupBy(0)
		.reduce(new SumScore<>())
		.setCombineHint(CombineHint.HASH)
			.setParallelism(parallelism)
			.name("Sum");

	// ignored ID, total pagerank
	DataSet<Tuple2<K, DoubleValue>> sumOfScores = vertexScores
		.reduce(new SumVertexScores<>())
			.setParallelism(parallelism)
			.name("Sum");

	// s, adjusted pagerank(s)
	DataSet<Tuple2<K, DoubleValue>> adjustedScores = vertexScores
		.union(sourceVertices)
			.name("Union with source vertices")
		.map(new AdjustScores<>(dampingFactor))
			.withBroadcastSet(sumOfScores, SUM_OF_SCORES)
			.withBroadcastSet(vertexCount, VERTEX_COUNT)
				.setParallelism(parallelism)
				.name("Adjust scores");

	DataSet<Tuple2<K, DoubleValue>> passThrough;

	if (convergenceThreshold < Double.MAX_VALUE) {
		passThrough = iterative
			.join(adjustedScores)
			.where(0)
			.equalTo(0)
			.with(new ChangeInScores<>())
				.setParallelism(parallelism)
				.name("Change in scores");

		iterative.registerAggregationConvergenceCriterion(CHANGE_IN_SCORES, new DoubleSumAggregator(), new ScoreConvergence(convergenceThreshold));
	} else {
		passThrough = adjustedScores;
	}

	return iterative
		.closeWith(passThrough)
		.map(new TranslateResult<>())
			.setParallelism(parallelism)
			.name("Map result");
}

Source File: PageRank.java From flink with Apache License 2.0

4 votes

@Override
public Result<T> map(Tuple2<T, DoubleValue> value) throws Exception {
	output.setVertexId0(value.f0);
	output.setPageRankScore(value.f1);
	return output;
}

org.apache.flink.graph.library.linkanalysis.PageRank.Result Java Examples