org.apache.spark.graphx.util.GraphGenerators Scala Examples
The following examples show how to use org.apache.spark.graphx.util.GraphGenerators.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: GraphGeneration.scala From Mastering-Machine-Learning-with-Spark-2.x with MIT License | 6 votes |
package com.github.maxpumperla.ml_spark.graphs import org.apache.spark.graphx.lib.TriangleCount import org.apache.spark.graphx.util.GraphGenerators import org.apache.spark.graphx.{Graph, GraphLoader, PartitionStrategy, VertexId} import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object GraphGeneration extends App { val conf = new SparkConf() .setAppName("Graph generation") .setMaster("local[4]") val sc = new SparkContext(conf) val edgeListGraph = GraphLoader.edgeListFile(sc, "./edge_list.txt") val rawEdges: RDD[(VertexId, VertexId)] = sc.textFile("./edge_list.txt").map { line => val field = line.split(" ") (field(0).toLong, field(1).toLong) } val edgeTupleGraph = Graph.fromEdgeTuples( rawEdges=rawEdges, defaultValue="") val gridGraph = GraphGenerators.gridGraph(sc, 5, 5) val starGraph = GraphGenerators.starGraph(sc, 11) val logNormalGraph = GraphGenerators.logNormalGraph( sc, numVertices = 20, mu=1, sigma = 3 ) logNormalGraph.outDegrees.map(_._2).collect().sorted val actorGraph = GraphLoader.edgeListFile( sc, "./ca-hollywood-2009.txt", true ).partitionBy(PartitionStrategy.RandomVertexCut) actorGraph.edges.count() val actorComponents = actorGraph.connectedComponents().cache actorComponents.vertices.map(_._2).distinct().count val clusterSizes =actorComponents.vertices.map( v => (v._2, 1)).reduceByKey(_ + _) clusterSizes.map(_._2).max clusterSizes.map(_._2).min val smallActorGraph = GraphLoader.edgeListFile(sc, "./ca-hollywood-2009.txt") val strongComponents = smallActorGraph.stronglyConnectedComponents(numIter = 5) strongComponents.vertices.map(_._2).distinct().count val canonicalGraph = actorGraph.mapEdges(e => 1).removeSelfEdges().convertToCanonicalEdges() val partitionedGraph = canonicalGraph.partitionBy(PartitionStrategy.RandomVertexCut) actorGraph.triangleCount() val triangles = TriangleCount.runPreCanonicalized(partitionedGraph) actorGraph.staticPageRank(10) val actorPrGraph: Graph[Double, Double] = actorGraph.pageRank(0.0001) actorPrGraph.vertices.reduce((v1, v2) => { if (v1._2 > v2._2) v1 else v2 }) actorPrGraph.inDegrees.filter(v => v._1 == 33024L).collect.foreach(println) actorPrGraph.inDegrees.map(_._2).collect().sorted.takeRight(10) actorPrGraph.inDegrees.map(_._2).filter(_ >= 62).count }
Example 2
Source File: SynthBenchmark.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.graphx import java.io.{FileOutputStream, PrintWriter} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.graphx.{GraphXUtils, PartitionStrategy} import org.apache.spark.graphx.util.GraphGenerators def main(args: Array[String]) { val options = args.map { arg => arg.dropWhile(_ == '-').split('=') match { case Array(opt, v) => (opt -> v) case _ => throw new IllegalArgumentException("Invalid argument: " + arg) } } var app = "pagerank" var niter = 10 var numVertices = 100000 var numEPart: Option[Int] = None var partitionStrategy: Option[PartitionStrategy] = None var mu: Double = 4.0 var sigma: Double = 1.3 var degFile: String = "" var seed: Int = -1 options.foreach { case ("app", v) => app = v case ("niters", v) => niter = v.toInt case ("nverts", v) => numVertices = v.toInt case ("numEPart", v) => numEPart = Some(v.toInt) case ("partStrategy", v) => partitionStrategy = Some(PartitionStrategy.fromString(v)) case ("mu", v) => mu = v.toDouble case ("sigma", v) => sigma = v.toDouble case ("degFile", v) => degFile = v case ("seed", v) => seed = v.toInt case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt) } val conf = new SparkConf() .setAppName(s"GraphX Synth Benchmark (nverts = $numVertices, app = $app)") GraphXUtils.registerKryoClasses(conf) val sc = new SparkContext(conf) // Create the graph println(s"Creating graph...") val unpartitionedGraph = GraphGenerators.logNormalGraph(sc, numVertices, numEPart.getOrElse(sc.defaultParallelism), mu, sigma, seed) // Repartition the graph val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_)).cache() var startTime = System.currentTimeMillis() val numEdges = graph.edges.count() println(s"Done creating graph. Num Vertices = $numVertices, Num Edges = $numEdges") val loadTime = System.currentTimeMillis() - startTime // Collect the degree distribution (if desired) if (!degFile.isEmpty) { val fos = new FileOutputStream(degFile) val pos = new PrintWriter(fos) val hist = graph.vertices.leftJoin(graph.degrees)((id, _, optDeg) => optDeg.getOrElse(0)) .map(p => p._2).countByValue() hist.foreach { case (deg, count) => pos.println(s"$deg \t $count") } } // Run PageRank startTime = System.currentTimeMillis() if (app == "pagerank") { println("Running PageRank") val totalPR = graph.staticPageRank(niter).vertices.map(_._2).sum() println(s"Total PageRank = $totalPR") } else if (app == "cc") { println("Running Connected Components") val numComponents = graph.connectedComponents.vertices.map(_._2).distinct().count() println(s"Number of components = $numComponents") } val runTime = System.currentTimeMillis() - startTime println(s"Num Vertices = $numVertices") println(s"Num Edges = $numEdges") println(s"Creation time = ${loadTime/1000.0} seconds") println(s"Run time = ${runTime/1000.0} seconds") sc.stop() } } // scalastyle:on println
Example 3
Source File: SSSPExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.graphx // $example on$ import org.apache.spark.graphx.{Graph, VertexId} import org.apache.spark.graphx.util.GraphGenerators // $example off$ import org.apache.spark.sql.SparkSession object SSSPExample { def main(args: Array[String]): Unit = { // Creates a SparkSession. val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") .getOrCreate() val sc = spark.sparkContext // $example on$ // A graph with edge attributes containing distances val graph: Graph[Long, Double] = GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble) val sourceId: VertexId = 42 // The ultimate source // Initialize the graph such that all vertices except the root have distance infinity. val initialGraph = graph.mapVertices((id, _) => if (id == sourceId) 0.0 else Double.PositiveInfinity) val sssp = initialGraph.pregel(Double.PositiveInfinity)( (id, dist, newDist) => math.min(dist, newDist), // Vertex Program triplet => { // Send Message if (triplet.srcAttr + triplet.attr < triplet.dstAttr) { Iterator((triplet.dstId, triplet.srcAttr + triplet.attr)) } else { Iterator.empty } }, (a, b) => math.min(a, b) // Merge Message ) println(sssp.vertices.collect.mkString("\n")) // $example off$ spark.stop() } } // scalastyle:on println
Example 4
Source File: GraphDataGen.scala From spark-bench with Apache License 2.0 | 5 votes |
package com.ibm.sparktc.sparkbench.datageneration import org.apache.spark.sql.{DataFrame, Row, SparkSession} import com.ibm.sparktc.sparkbench.utils.{SaveModes, SparkBenchException} import com.ibm.sparktc.sparkbench.utils.GeneralFunctions.{any2Long, getOrDefault, getOrThrow, time} import com.ibm.sparktc.sparkbench.workload.{Workload, WorkloadDefaults} import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType} import org.apache.spark.graphx.util.GraphGenerators object GraphDataGen extends WorkloadDefaults { val name = "graph-data-generator" val defaultMu = 4.0 val defaultSigma = 1.3 val defaultSeed = -1L val defaultNumOfPartitions = 0 override def apply(m: Map[String, Any]): GraphDataGen = { val numVertices = getOrThrow(m, "vertices").asInstanceOf[Int] val mu = getOrDefault[Double](m, "mu", defaultMu) val sigma = getOrDefault[Double](m, "sigma", defaultSigma) val numPartitions = getOrDefault[Int](m, "partitions", defaultNumOfPartitions) val seed = getOrDefault[Long](m, "seed", defaultSeed, any2Long) val output = { val str = getOrThrow(m, "output").asInstanceOf[String] val s = verifySuitabilityOfOutputFileFormat(str) Some(s) } val saveMode = getOrDefault[String](m, "save-mode", SaveModes.error) new GraphDataGen( numVertices = numVertices, input = None, output = output, saveMode = saveMode, mu = mu, sigma = sigma, seed = seed, numPartitions = numPartitions ) } private[datageneration] def verifySuitabilityOfOutputFileFormat(str: String): String = { val strArr: Array[String] = str.split('.') (strArr.length, strArr.last) match { case (1, _) => throw SparkBenchException("Output file for GraphDataGen must have \".txt\" as the file extension." + "Please modify your config file.") case (2, "txt") => str case (_, _) => throw SparkBenchException("Due to limitations of the GraphX GraphLoader, " + "the graph data generators may only save files as \".txt\"." + "Please modify your config file.") } } } case class GraphDataGen ( numVertices: Int, input: Option[String] = None, output: Option[String], saveMode: String, mu: Double = 4.0, sigma: Double = 1.3, seed: Long = 1, numPartitions: Int = 0 ) extends Workload { override def doWorkload(df: Option[DataFrame] = None, spark: SparkSession): DataFrame = { val timestamp = System.currentTimeMillis() val (generateTime, graph) = time(GraphGenerators.logNormalGraph(spark.sparkContext, numVertices, numPartitions, mu, sigma)) val (convertTime, out) = time(graph.edges.map(e => s"${e.srcId.toString} ${e.dstId}")) val (saveTime, _) = time(out.saveAsTextFile(output.get)) val timeResultSchema = StructType( List( StructField("name", StringType, nullable = false), StructField("timestamp", LongType, nullable = false), StructField("generate", LongType, nullable = true), StructField("convert", LongType, nullable = true), StructField("save", LongType, nullable = true), StructField("total_runtime", LongType, nullable = false) ) ) val total = generateTime + convertTime + saveTime val timeList = spark.sparkContext.parallelize(Seq(Row(GraphDataGen.name, timestamp, generateTime, convertTime, saveTime, total))) spark.createDataFrame(timeList, timeResultSchema) } }
Example 5
Source File: SSSPExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.graphx // $example on$ import org.apache.spark.graphx.{Graph, VertexId} import org.apache.spark.graphx.util.GraphGenerators // $example off$ import org.apache.spark.sql.SparkSession object SSSPExample { def main(args: Array[String]): Unit = { // Creates a SparkSession. val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") .getOrCreate() val sc = spark.sparkContext // $example on$ // A graph with edge attributes containing distances val graph: Graph[Long, Double] = GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble) val sourceId: VertexId = 42 // The ultimate source // Initialize the graph such that all vertices except the root have distance infinity. val initialGraph = graph.mapVertices((id, _) => if (id == sourceId) 0.0 else Double.PositiveInfinity) val sssp = initialGraph.pregel(Double.PositiveInfinity)( (id, dist, newDist) => math.min(dist, newDist), // Vertex Program triplet => { // Send Message if (triplet.srcAttr + triplet.attr < triplet.dstAttr) { Iterator((triplet.dstId, triplet.srcAttr + triplet.attr)) } else { Iterator.empty } }, (a, b) => math.min(a, b) // Merge Message ) println(sssp.vertices.collect.mkString("\n")) // $example off$ spark.stop() } } // scalastyle:on println
Example 6
Source File: SSSPExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.graphx // $example on$ import org.apache.spark.graphx.{Graph, VertexId} import org.apache.spark.graphx.util.GraphGenerators // $example off$ import org.apache.spark.sql.SparkSession object SSSPExample { def main(args: Array[String]): Unit = { // Creates a SparkSession. val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") .getOrCreate() val sc = spark.sparkContext // $example on$ // A graph with edge attributes containing distances val graph: Graph[Long, Double] = GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble) val sourceId: VertexId = 42 // The ultimate source // Initialize the graph such that all vertices except the root have distance infinity. val initialGraph = graph.mapVertices((id, _) => if (id == sourceId) 0.0 else Double.PositiveInfinity) val sssp = initialGraph.pregel(Double.PositiveInfinity)( (id, dist, newDist) => math.min(dist, newDist), // Vertex Program triplet => { // Send Message if (triplet.srcAttr + triplet.attr < triplet.dstAttr) { Iterator((triplet.dstId, triplet.srcAttr + triplet.attr)) } else { Iterator.empty } }, (a, b) => math.min(a, b) // Merge Message ) println(sssp.vertices.collect.mkString("\n")) // $example off$ spark.stop() } } // scalastyle:on println
Example 7
Source File: ShortestPathsSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.lib import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.SparkContext._ import org.apache.spark.graphx._ import org.apache.spark.graphx.lib._ import org.apache.spark.graphx.util.GraphGenerators import org.apache.spark.rdd._ class ShortestPathsSuite extends SparkFunSuite with LocalSparkContext { test("Shortest Path Computations") { withSpark { sc => val shortestPaths = Set( (1, Map(1 -> 0, 4 -> 2)), (2, Map(1 -> 1, 4 -> 2)), (3, Map(1 -> 2, 4 -> 1)), (4, Map(1 -> 2, 4 -> 0)), (5, Map(1 -> 1, 4 -> 1)), (6, Map(1 -> 3, 4 -> 1))) val edgeSeq = Seq((1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)).flatMap { case e => Seq(e, e.swap) } val edges = sc.parallelize(edgeSeq).map { case (v1, v2) => (v1.toLong, v2.toLong) } val graph = Graph.fromEdgeTuples(edges, 1) val landmarks = Seq(1, 4).map(_.toLong) val results = ShortestPaths.run(graph, landmarks).vertices.collect.map { case (v, spMap) => (v, spMap.mapValues(i => i)) } assert(results.toSet === shortestPaths) } } }
Example 8
Source File: StronglyConnectedComponentsSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.lib import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.SparkContext._ import org.apache.spark.graphx._ import org.apache.spark.graphx.util.GraphGenerators import org.apache.spark.rdd._ class StronglyConnectedComponentsSuite extends SparkFunSuite with LocalSparkContext { test("Island Strongly Connected Components") { withSpark { sc => val vertices = sc.parallelize((1L to 5L).map(x => (x, -1))) val edges = sc.parallelize(Seq.empty[Edge[Int]]) val graph = Graph(vertices, edges) val sccGraph = graph.stronglyConnectedComponents(5) for ((id, scc) <- sccGraph.vertices.collect()) { assert(id === scc) } } } test("Cycle Strongly Connected Components") { withSpark { sc => val rawEdges = sc.parallelize((0L to 6L).map(x => (x, (x + 1) % 7))) val graph = Graph.fromEdgeTuples(rawEdges, -1) val sccGraph = graph.stronglyConnectedComponents(20) for ((id, scc) <- sccGraph.vertices.collect()) { assert(0L === scc) } } } test("2 Cycle Strongly Connected Components") { withSpark { sc => val edges = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++ Array(3L -> 4L, 4L -> 5L, 5L -> 3L) ++ Array(6L -> 0L, 5L -> 7L) val rawEdges = sc.parallelize(edges) val graph = Graph.fromEdgeTuples(rawEdges, -1) val sccGraph = graph.stronglyConnectedComponents(20) for ((id, scc) <- sccGraph.vertices.collect()) { if (id < 3) { assert(0L === scc) } else if (id < 6) { assert(3L === scc) } else { assert(id === scc) } } } } }
Example 9
Source File: ShortestPathsSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.lib import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.SparkContext._ import org.apache.spark.graphx._ import org.apache.spark.graphx.lib._ import org.apache.spark.graphx.util.GraphGenerators import org.apache.spark.rdd._ class ShortestPathsSuite extends SparkFunSuite with LocalSparkContext { test("Shortest Path Computations") {//最短路径的计算 withSpark { sc => val shortestPaths = Set( (1, Map(1 -> 0, 4 -> 2)), (2, Map(1 -> 1, 4 -> 2)), (3, Map(1 -> 2, 4 -> 1)), (4, Map(1 -> 2, 4 -> 0)), (5, Map(1 -> 1, 4 -> 1)), (6, Map(1 -> 3, 4 -> 1))) val edgeSeq = Seq((1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)).flatMap { case e => Seq(e, e.swap) } val edges = sc.parallelize(edgeSeq).map { case (v1, v2) => (v1.toLong, v2.toLong) } //根据边的两个顶点数据构建 val graph = Graph.fromEdgeTuples(edges, 1) val landmarks = Seq(1, 4).map(_.toLong) val results = ShortestPaths.run(graph, landmarks).vertices.collect.map { case (v, spMap) => (v, spMap.mapValues(i => i)) } assert(results.toSet === shortestPaths) } } }
Example 10
Source File: StronglyConnectedComponentsSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.lib import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.SparkContext._ import org.apache.spark.graphx._ import org.apache.spark.graphx.util.GraphGenerators import org.apache.spark.rdd._ class StronglyConnectedComponentsSuite extends SparkFunSuite with LocalSparkContext { test("Island Strongly Connected Components") {//岛强连接组件 withSpark { sc => val vertices = sc.parallelize((1L to 5L).map(x => (x, -1))) val edges = sc.parallelize(Seq.empty[Edge[Int]]) val graph = Graph(vertices, edges) val sccGraph = graph.stronglyConnectedComponents(5) for ((id, scc) <- sccGraph.vertices.collect()) { assert(id === scc) } } } test("Cycle Strongly Connected Components") {//循环强连接组件 withSpark { sc => val rawEdges = sc.parallelize((0L to 6L).map(x => (x, (x + 1) % 7))) //根据边的两个顶点数据构建 val graph = Graph.fromEdgeTuples(rawEdges, -1) val sccGraph = graph.stronglyConnectedComponents(20) for ((id, scc) <- sccGraph.vertices.collect()) { assert(0L === scc) } } } test("2 Cycle Strongly Connected Components") {//2循环强连接组件 withSpark { sc => val edges = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++ Array(3L -> 4L, 4L -> 5L, 5L -> 3L) ++ Array(6L -> 0L, 5L -> 7L) val rawEdges = sc.parallelize(edges) //根据边的两个顶点数据构建 val graph = Graph.fromEdgeTuples(rawEdges, -1) val sccGraph = graph.stronglyConnectedComponents(20) for ((id, scc) <- sccGraph.vertices.collect()) { if (id < 3) { assert(0L === scc) } else if (id < 6) { assert(3L === scc) } else { assert(id === scc) } } } } }
Example 11
Source File: Modularity$Test.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.measures.graph import ml.sparkling.graph.operators.MeasureTest import org.apache.spark.SparkContext import org.apache.spark.graphx.Graph import ml.sparkling.graph.operators.OperatorsDSL._ import org.apache.spark.graphx.util.GraphGenerators class Modularity$Test (implicit sc:SparkContext) extends MeasureTest{ "Modularity for star graph in one community" should "be 0" in{ Given("graph") val filePath = getClass.getResource("/graphs/6_nodes_star") val graph:Graph[Int,Int]=loadGraph(filePath.toString) val graphComponents=graph.PSCAN(epsilon = 0) When("Computes Modularity") val result=Modularity.compute(graphComponents) Then("Should calculate Modularity") result should be (0) graph.unpersist(true) } "Modularity for ring graph in one community" should "be 0" in{ Given("graph") val graph=GraphGenerators.gridGraph(sc,5,5).mapEdges((_)=>1).mapVertices((_,_)=>1) val graphComponents=graph.PSCAN(epsilon = 0) When("Computes Modularity") val result=Modularity.compute(graphComponents) Then("Should calculate Modularity") result should be (0) graph.unpersist(true) } "Modularity for ring graph in one node communities" should "be -0.041875" in{ Given("graph") val graph=GraphGenerators.gridGraph(sc,5,5) val graphComponents=graph.PSCAN(epsilon = 1) When("Computes Modularity") val result=Modularity.compute(graphComponents) Then("Should calculate Modularity") result should be (-0.041875 +- 0.000000001) graph.unpersist(true) } "Modularity for star graph in one community" should "be 0 when calculated using DSL" in{ Given("graph") val filePath = getClass.getResource("/graphs/6_nodes_star") val graph:Graph[Int,Int]=loadGraph(filePath.toString) val graphComponents=graph.PSCAN(epsilon = 0) When("Computes Modularity") val result=graphComponents.modularity() Then("Should calculate Modularity") result should be (0) graph.unpersist(true) } "Modularity for all single components" should "be -1 " in{ Given("graph") val filePath = getClass.getResource("/graphs/6_nodes_star") val graph:Graph[Int,Int]=loadGraph(filePath.toString) val graphComponents=graph.PSCAN(epsilon=1) When("Computes Modularity") val result=graphComponents.modularity() Then("Should calculate Modularity") result should be (-0.3 +- 0.000000001) graph.unpersist(true) } }
Example 12
Source File: PSCAN$Test.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.algorithms.community.pscan import ml.sparkling.graph.api.operators.algorithms.community.CommunityDetection.ComponentID import ml.sparkling.graph.operators.MeasureTest import org.apache.spark.SparkContext import org.apache.spark.graphx.Graph import ml.sparkling.graph.operators.OperatorsDSL._ import org.apache.spark.graphx.util.GraphGenerators class PSCAN$Test (implicit sc:SparkContext) extends MeasureTest { "Components for full graph" should " be computed" in{ Given("graph") val filePath = getClass.getResource("/graphs/4_nodes_full") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes components") val components: Graph[ComponentID, Int] = PSCAN.computeConnectedComponents(graph) Then("Should compute components correctly") components.vertices.map{case (vId,cId)=>cId}.distinct().collect().size should equal (1) graph.unpersist(true) } "Components for full graph" should " be computed using DSL" in{ Given("graph") val filePath = getClass.getResource("/graphs/4_nodes_full") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes components") val components: Graph[ComponentID, Int] =graph.PSCAN() Then("Should compute components correctly") components.vertices.map{case (vId,cId)=>cId}.distinct().collect().size should equal (1) graph.unpersist(true) } "Components for ring graph" should " be computed" in{ Given("graph") val filePath = getClass.getResource("/graphs/5_nodes_directed") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes components") val components: Graph[ComponentID, Int] = PSCAN.computeConnectedComponents(graph) Then("Should compute components correctly") components.vertices.map{case (vId,cId)=>cId}.distinct().collect().size should equal (5) graph.unpersist(true) } "Components for 3 component graph" should " be computed" in{ Given("graph") val filePath = getClass.getResource("/graphs/coarsening_to_3") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes components") val components: Graph[ComponentID, Int] = PSCAN.computeConnectedComponents(graph) Then("Should compute components correctly") components.vertices.map{case (vId,cId)=>cId}.distinct().collect().size should equal (3) graph.unpersist(true) } "Dynamic components detection for 3 component graph" should " be computed" in{ Given("graph") val filePath = getClass.getResource("/graphs/coarsening_to_3") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes components") val (_,numberOfComponents)= PSCAN.computeConnectedComponentsUsing(graph,3) Then("Should compute components correctly") numberOfComponents should equal (3) graph.unpersist(true) } "Dynamic components detection for RMAT graph" should " be computed" in{ for(x<- 0 to 10){ Given("graph") val graph:Graph[Int,Int]=GraphGenerators.rmatGraph(sc,33,132) When("Computes components") val (_,numberOfComponents)= PSCAN.computeConnectedComponentsUsing(graph,24) Then("Should compute components correctly") numberOfComponents should equal (24l +- 5l) graph.unpersist(true) } } "Dynamic components detection for random graph" should " be computed" in{ Given("graph") val graph:Graph[Int,Int]=GraphGenerators.rmatGraph(sc,1000,10000) When("Computes components") val (_,numberOfComponents)= PSCAN.computeConnectedComponentsUsing(graph,24) Then("Should compute components correctly") numberOfComponents should equal (24l +- 5l) graph.unpersist(true) } }
Example 13
Source File: SSSPExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.graphx // $example on$ import org.apache.spark.graphx.{Graph, VertexId} import org.apache.spark.graphx.util.GraphGenerators // $example off$ import org.apache.spark.sql.SparkSession object SSSPExample { def main(args: Array[String]): Unit = { // Creates a SparkSession. val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") .getOrCreate() val sc = spark.sparkContext // $example on$ // A graph with edge attributes containing distances val graph: Graph[Long, Double] = GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble) val sourceId: VertexId = 42 // The ultimate source // Initialize the graph such that all vertices except the root have distance infinity. val initialGraph = graph.mapVertices((id, _) => if (id == sourceId) 0.0 else Double.PositiveInfinity) val sssp = initialGraph.pregel(Double.PositiveInfinity)( (id, dist, newDist) => math.min(dist, newDist), // Vertex Program triplet => { // Send Message if (triplet.srcAttr + triplet.attr < triplet.dstAttr) { Iterator((triplet.dstId, triplet.srcAttr + triplet.attr)) } else { Iterator.empty } }, (a, b) => math.min(a, b) // Merge Message ) println(sssp.vertices.collect.mkString("\n")) // $example off$ spark.stop() } } // scalastyle:on println
Example 14
Source File: ShortestPathsSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.lib import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.SparkContext._ import org.apache.spark.graphx._ import org.apache.spark.graphx.lib._ import org.apache.spark.graphx.util.GraphGenerators import org.apache.spark.rdd._ class ShortestPathsSuite extends SparkFunSuite with LocalSparkContext { test("Shortest Path Computations") { withSpark { sc => val shortestPaths = Set( (1, Map(1 -> 0, 4 -> 2)), (2, Map(1 -> 1, 4 -> 2)), (3, Map(1 -> 2, 4 -> 1)), (4, Map(1 -> 2, 4 -> 0)), (5, Map(1 -> 1, 4 -> 1)), (6, Map(1 -> 3, 4 -> 1))) val edgeSeq = Seq((1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)).flatMap { case e => Seq(e, e.swap) } val edges = sc.parallelize(edgeSeq).map { case (v1, v2) => (v1.toLong, v2.toLong) } val graph = Graph.fromEdgeTuples(edges, 1) val landmarks = Seq(1, 4).map(_.toLong) val results = ShortestPaths.run(graph, landmarks).vertices.collect.map { case (v, spMap) => (v, spMap.mapValues(i => i)) } assert(results.toSet === shortestPaths) } } }
Example 15
Source File: StronglyConnectedComponentsSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.lib import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.SparkContext._ import org.apache.spark.graphx._ import org.apache.spark.graphx.util.GraphGenerators import org.apache.spark.rdd._ class StronglyConnectedComponentsSuite extends SparkFunSuite with LocalSparkContext { test("Island Strongly Connected Components") { withSpark { sc => val vertices = sc.parallelize((1L to 5L).map(x => (x, -1))) val edges = sc.parallelize(Seq.empty[Edge[Int]]) val graph = Graph(vertices, edges) val sccGraph = graph.stronglyConnectedComponents(5) for ((id, scc) <- sccGraph.vertices.collect()) { assert(id === scc) } } } test("Cycle Strongly Connected Components") { withSpark { sc => val rawEdges = sc.parallelize((0L to 6L).map(x => (x, (x + 1) % 7))) val graph = Graph.fromEdgeTuples(rawEdges, -1) val sccGraph = graph.stronglyConnectedComponents(20) for ((id, scc) <- sccGraph.vertices.collect()) { assert(0L === scc) } } } test("2 Cycle Strongly Connected Components") { withSpark { sc => val edges = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++ Array(3L -> 4L, 4L -> 5L, 5L -> 3L) ++ Array(6L -> 0L, 5L -> 7L) val rawEdges = sc.parallelize(edges) val graph = Graph.fromEdgeTuples(rawEdges, -1) val sccGraph = graph.stronglyConnectedComponents(20) for ((id, scc) <- sccGraph.vertices.collect()) { if (id < 3) { assert(0L === scc) } else if (id < 6) { assert(3L === scc) } else { assert(id === scc) } } } } }