org.apache.spark.graphx.GraphXUtils Scala Examples
The following examples show how to use org.apache.spark.graphx.GraphXUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SynthBenchmark.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.graphx import java.io.{FileOutputStream, PrintWriter} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.graphx.{GraphXUtils, PartitionStrategy} import org.apache.spark.graphx.util.GraphGenerators def main(args: Array[String]) { val options = args.map { arg => arg.dropWhile(_ == '-').split('=') match { case Array(opt, v) => (opt -> v) case _ => throw new IllegalArgumentException("Invalid argument: " + arg) } } var app = "pagerank" var niter = 10 var numVertices = 100000 var numEPart: Option[Int] = None var partitionStrategy: Option[PartitionStrategy] = None var mu: Double = 4.0 var sigma: Double = 1.3 var degFile: String = "" var seed: Int = -1 options.foreach { case ("app", v) => app = v case ("niters", v) => niter = v.toInt case ("nverts", v) => numVertices = v.toInt case ("numEPart", v) => numEPart = Some(v.toInt) case ("partStrategy", v) => partitionStrategy = Some(PartitionStrategy.fromString(v)) case ("mu", v) => mu = v.toDouble case ("sigma", v) => sigma = v.toDouble case ("degFile", v) => degFile = v case ("seed", v) => seed = v.toInt case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt) } val conf = new SparkConf() .setAppName(s"GraphX Synth Benchmark (nverts = $numVertices, app = $app)") GraphXUtils.registerKryoClasses(conf) val sc = new SparkContext(conf) // Create the graph println(s"Creating graph...") val unpartitionedGraph = GraphGenerators.logNormalGraph(sc, numVertices, numEPart.getOrElse(sc.defaultParallelism), mu, sigma, seed) // Repartition the graph val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_)).cache() var startTime = System.currentTimeMillis() val numEdges = graph.edges.count() println(s"Done creating graph. Num Vertices = $numVertices, Num Edges = $numEdges") val loadTime = System.currentTimeMillis() - startTime // Collect the degree distribution (if desired) if (!degFile.isEmpty) { val fos = new FileOutputStream(degFile) val pos = new PrintWriter(fos) val hist = graph.vertices.leftJoin(graph.degrees)((id, _, optDeg) => optDeg.getOrElse(0)) .map(p => p._2).countByValue() hist.foreach { case (deg, count) => pos.println(s"$deg \t $count") } } // Run PageRank startTime = System.currentTimeMillis() if (app == "pagerank") { println("Running PageRank") val totalPR = graph.staticPageRank(niter).vertices.map(_._2).sum() println(s"Total PageRank = $totalPR") } else if (app == "cc") { println("Running Connected Components") val numComponents = graph.connectedComponents.vertices.map(_._2).distinct().count() println(s"Number of components = $numComponents") } val runTime = System.currentTimeMillis() - startTime println(s"Num Vertices = $numVertices") println(s"Num Edges = $numEdges") println(s"Creation time = ${loadTime/1000.0} seconds") println(s"Run time = ${runTime/1000.0} seconds") sc.stop() } } // scalastyle:on println
Example 2
Source File: LocalSparkContext.scala From sandpiper with Apache License 2.0 | 5 votes |
package sparkle.util import org.apache.log4j.{Level, Logger} import org.apache.spark.graphx.GraphXUtils import org.apache.spark.{SparkConf, SparkContext} def withSpark[T](f: SparkContext => T): T = { val conf = new SparkConf() GraphXUtils.registerKryoClasses(conf) val sc = new SparkContext("local", "test", conf) Logger.getLogger("org").setLevel(Level.OFF) Logger.getLogger("akka").setLevel(Level.OFF) try { f(sc) } finally { sc.stop() } } }