org.apache.spark.graphx.VertexRDD Scala Examples
The following examples show how to use org.apache.spark.graphx.VertexRDD.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: PipeClusteringStrongestPath.scala From sddf with GNU General Public License v3.0 | 5 votes |
package de.unihamburg.vsis.sddf.clustering import scala.Iterator import org.apache.spark.graphx.Graph import org.apache.spark.graphx.VertexRDD import de.unihamburg.vsis.sddf.reading.Tuple class PipeClusteringStrongestPath extends PipeClusteringTransitiveClosure { override def manipulateGraph(graph: Graph[Tuple, Double]): Graph[_, Double] = { val cGraph = graph.mapVertices((vid, tuple) => (vid, Double.MinPositiveValue)) // attach the max adjacent edge attribute to each vertice val verticesMaxEdgeAttributes: VertexRDD[Double] = cGraph.mapReduceTriplets( edge => { Iterator((edge.dstId, edge.attr), (edge.srcId, edge.attr)) }, (a: Double, b: Double) => math.max(a, b) ) // join the resulting vertice attributes with the graph val maxGraph: Graph[(Tuple, Double), Double] = graph.outerJoinVertices(verticesMaxEdgeAttributes)((id, tuple, simOpt) => simOpt match { case Some(sim) => (tuple, sim) case None => (tuple, 0D) } ) // remove edges which have a max value less then src or dst val resultGraph = maxGraph.subgraph(edge => { if (edge.attr < edge.srcAttr._2 && edge.attr < edge.dstAttr._2) { false } else { true } }) resultGraph } } object PipeClusteringStrongestPath { def apply() = new PipeClusteringStrongestPath() }
Example 2
Source File: SocialPageRankJob.scala From spark-graphx with GNU General Public License v3.0 | 5 votes |
package com.github.graphx.pagerank import com.github.graphx.pregel.social.SocialGraph import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkContext import org.apache.spark.graphx.VertexRDD object SocialPageRankJob { def static(socialGraph: SocialGraph, tolerance: Double): VertexRDD[Double] = socialGraph.graph.staticPageRank(numIter = 20).vertices def handleResult(socialGraph: SocialGraph, ranks: VertexRDD[Double]) = { socialGraph.verts.join(ranks).map { case (_, (username, rank)) => (username, rank) }.sortBy({ case (_, rank) => rank }, ascending = false).take(10) } def main(args: Array[String]): Unit = { Logger.getLogger("org").setLevel(Level.ERROR) val sc = new SparkContext("local[*]", "PageRank") val socialGraph: SocialGraph = new SocialGraph(sc) val TOLERANCE: Double = 0.0001 import scala.compat.Platform.{EOL => D} val topUsersDynamically = handleResult(socialGraph, ranks(socialGraph, TOLERANCE)).mkString(D) val topUsersIterative = handleResult(socialGraph, static(socialGraph, TOLERANCE)).mkString(D) println(s"Top 10 users in network counted with TOLERANCE until convergence $TOLERANCE - $D $topUsersDynamically") println(s"Top 10 users in the network counted iteratively - $D $topUsersIterative") sc.stop() } }
Example 3
Source File: EdmondsBCAggregator.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds import ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds.struct.EdmondsVertex import org.apache.spark.graphx.{VertexRDD, _} class EdmondsBCAggregator[ED] extends Serializable { def aggregate(graph: Graph[EdmondsVertex, ED], source: VertexId) = { // val startTime = System.nanoTime() val maxDepth = graph.vertices.aggregate(0)({ case (depth, (vId, vData)) => Math.max(vData.depth, depth) }, Math.max) var g = graph var oldGraph: Option[Graph[EdmondsVertex, ED]] = None var messages = aggregateMessages(g, maxDepth).cache messages.count for (i <- 1 until maxDepth reverse) { oldGraph = Some(g) g = applyMessages(g, messages).cache val oldMessages = messages messages = aggregateMessages(g, i).cache messages.count oldMessages.unpersist(false) oldGraph.foreach(_.unpersistVertices(false)) oldGraph.foreach(_.edges.unpersist(false)) } messages.unpersist(false) // println("Time of execution updateCentrality:" + ((finishTime - startTime) / 1000000) + " ms") // val finishTime = System.nanoTime() g } private def aggregateMessages(graph: Graph[EdmondsVertex, ED], depth: Int) = graph.aggregateMessages[Double]( edgeContext => { val sender = createAndSendMessage(edgeContext.toEdgeTriplet, depth) _ sender(edgeContext.srcId, edgeContext.sendToDst) sender(edgeContext.dstId, edgeContext.sendToSrc) }, _ + _ ) private def createAndSendMessage(triplet: EdgeTriplet[EdmondsVertex, ED], depth: Int)(source: VertexId, f: (Double) => Unit) = { val attr = triplet.vertexAttr(source) if (attr.depth == depth) sendMessage(produceMessage(triplet)(source), f) } private def produceMessage(triplet: EdgeTriplet[EdmondsVertex, ED])(source: VertexId) = { val attr = triplet.vertexAttr(source) val otherAttr = triplet.otherVertexAttr(source) val delta = (otherAttr.sigma.toDouble / attr.sigma.toDouble) * (1.0 + attr.delta) if (attr.preds.contains(triplet.otherVertexId(source))) Some(delta) else None } private def sendMessage(message: Option[Double], f: (Double) => Unit) = message.foreach(f) private def applyMessages(graph: Graph[EdmondsVertex, ED], messages: VertexRDD[Double]) = graph.ops.joinVertices(messages)((vertexId, attr, delta) => { EdmondsVertex(attr.preds, attr.sigma, attr.depth, delta, delta) }) }
Example 4
Source File: Modularity.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.measures.graph import ml.sparkling.graph.api.operators.algorithms.community.CommunityDetection.ComponentID import ml.sparkling.graph.api.operators.measures.{VertexDependentGraphMeasure, GraphIndependentMeasure} import org.apache.spark.graphx.{EdgeTriplet, VertexRDD, Graph} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag object Modularity extends VertexDependentGraphMeasure[Double,ComponentID]{ def compute[V<:ComponentID:ClassTag,E:ClassTag](graph: Graph[V, E]): Double = { val edgesNum=graph.numEdges.toDouble; val edgesCounts: RDD[(V, (Int, Int))] = graph.triplets.flatMap(triplet => { if (triplet.srcAttr == triplet.dstAttr) { Iterator((triplet.srcAttr, (1, 0)),(triplet.srcAttr, (1, 0))) } else { Iterator((triplet.srcAttr, (0, 1)),(triplet.dstAttr,(0,1))) } }) edgesCounts.aggregateByKey((0,0))( (agg:(Int,Int),data:(Int,Int))=> (agg,data) match{ case ((a1,b1),(a2,b2))=>(a1+a2,b1+b2) }, (agg1:(Int,Int),agg2:(Int,Int))=>{ (agg1,agg2) match{ case ((a1,b1),(a2,b2))=>(a1+a2,b1+b2) } } ).treeAggregate(0.0)( (agg:Double,data:(V,(Int,Int)))=>{ data match{ case (_,(edgesFull,edgesSome))=> agg+(edgesFull/(2.0*edgesNum))-Math.pow((edgesSome+edgesFull)/(2.0*edgesNum),2) } }, (agg1,agg2)=>agg1+agg2 ) } }
Example 5
Source File: BetweennessEdmonds$Test.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds import java.nio.file.Files import ml.sparkling.graph.operators.MeasureTest import org.apache.commons.io.FileUtils import org.apache.spark.SparkContext import org.apache.spark.graphx.{Graph, VertexRDD} class BetweennessEdmonds$Test(implicit sc: SparkContext) extends MeasureTest { val tempDir = Files.createTempDirectory("spark-checkpoint") override def beforeAll() = { sc.setCheckpointDir(tempDir.toAbsolutePath.toString) } override def afterAll() = { FileUtils.deleteDirectory(tempDir.toFile) } "Edmonds betweenness centrality for random graph" should "be correctly calculated" in { Given("graph") val filePath = getClass.getResource("/graphs/graph_ER_15") val graph: Graph[Int, Int] = loadGraph(filePath.toString) When("Computes betweenness") val result = EdmondsBC.computeBC(graph) Then("Should calculate betweenness correctly") val bcFile = getClass.getResource("/graphs/graph_ER_15_bc") val bcCorrectValues = sc.textFile(bcFile.getPath) .filter(_.nonEmpty) .map(l => { val t = l.split("\t", 2); (t(0).toInt, t(1).toDouble) }) .sortBy({ case (vId, data) => vId }) .map({ case (vId, data) => data}).collect() val bcValues = result.sortBy({ case (vId, data) => vId }) .map({ case (vId, data) => data }).collect() bcCorrectValues.zip(bcValues).foreach({ case (a, b) => a should be(b +- 1e-5) }) result.unpersist(false) } }