org.apache.spark.graphx.impl.GraphImpl Scala Examples
The following examples show how to use org.apache.spark.graphx.impl.GraphImpl.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: GraphLoader.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkContext import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 2
Source File: GraphLoader.scala From graphx-algorithm with GNU General Public License v2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.storage.StorageLevel import org.apache.spark.{Logging, SparkContext} import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { logWarning("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 3
Source File: GraphLoaderPlus.scala From graphx-algorithm with GNU General Public License v2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.storage.StorageLevel import org.apache.spark.{Logging, SparkContext} import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } if (lineArray.length == 2) { val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } else { val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong val weight = lineArray(2).toInt if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, weight) } else { builder.add(srcId, dstId, weight) } } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoaderPlus.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 4
Source File: GraphLoader.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkContext import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 5
Source File: GraphLoader.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkContext import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 6
Source File: GraphLoader.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.storage.StorageLevel import org.apache.spark.{Logging, SparkContext} import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 7
Source File: GraphLoader.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.storage.StorageLevel import org.apache.spark.{Logging, SparkContext} import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions //将边数据表直接解析为边分区 val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 8
Source File: GraphLoader.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkContext import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 9
Source File: GraphLoader.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.storage.StorageLevel import org.apache.spark.{Logging, SparkContext} import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} def edgeListFile( sc: SparkContext, path: String, canonicalOrientation: Boolean = false, numEdgePartitions: Int = -1, edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY, vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) : Graph[Int, Int] = { val startTime = System.currentTimeMillis // Parse the edge data table directly into edge partitions val lines = if (numEdgePartitions > 0) { sc.textFile(path, numEdgePartitions).coalesce(numEdgePartitions) } else { sc.textFile(path) } val edges = lines.mapPartitionsWithIndex { (pid, iter) => val builder = new EdgePartitionBuilder[Int, Int] iter.foreach { line => if (!line.isEmpty && line(0) != '#') { val lineArray = line.split("\\s+") if (lineArray.length < 2) { throw new IllegalArgumentException("Invalid line: " + line) } val srcId = lineArray(0).toLong val dstId = lineArray(1).toLong if (canonicalOrientation && srcId > dstId) { builder.add(dstId, srcId, 1) } else { builder.add(srcId, dstId, 1) } } } Iterator((pid, builder.toEdgePartition)) }.persist(edgeStorageLevel).setName("GraphLoader.edgeListFile - edges (%s)".format(path)) edges.count() logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime)) GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1, edgeStorageLevel = edgeStorageLevel, vertexStorageLevel = vertexStorageLevel) } // end of edgeListFile }
Example 10
Source File: GraphxNWeight.scala From Swallow with Apache License 2.0 | 5 votes |
package com.intel.hibench.sparkbench.graph.nweight import scala.collection.JavaConversions._ import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.HashPartitioner import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ import org.apache.spark.graphx.impl.GraphImpl import it.unimi.dsi.fastutil.longs.Long2DoubleOpenHashMap object GraphxNWeight extends Serializable{ def mapF(edge: EdgeContext[SizedPriorityQueue, Double, Long2DoubleOpenHashMap]) = { val theMap = new Long2DoubleOpenHashMap() val edgeAttribute = edge.attr val id = edge.srcId edge.dstAttr.foreach{ case (target, wn) => if (target != id) theMap.put(target, wn * edgeAttribute) } edge.sendToSrc(theMap) } def reduceF(c1: Long2DoubleOpenHashMap, c2: Long2DoubleOpenHashMap) = { c2.long2DoubleEntrySet() .fastIterator() .foreach(pair => c1.put(pair.getLongKey(), c1.get(pair.getLongKey()) + pair.getDoubleValue())) c1 } def updateF(id: VertexId, vdata: SizedPriorityQueue, msg: Option[Long2DoubleOpenHashMap]) = { vdata.clear() val weightMap = msg.orNull if (weightMap != null) { weightMap.long2DoubleEntrySet().fastIterator().foreach { pair => val src = pair.getLongKey() val wn = pair.getDoubleValue() vdata.enqueue((src, wn)) } } vdata } def nweight(sc: SparkContext, input: String, output: String, step: Int, maxDegree: Int, numPartitions: Int, storageLevel: StorageLevel) { //val start1 = System.currentTimeMillis val part = new HashPartitioner(numPartitions) val edges = sc.textFile(input, numPartitions).flatMap { line => val fields = line.split("\\s+", 2) val src = fields(0).trim.toLong fields(1).split("[,\\s]+").filter(_.isEmpty() == false).map { pairStr => val pair = pairStr.split(":") val (dest, weight) = (pair(0).trim.toLong, pair(1).toDouble) (src, Edge(src, dest, weight)) } }.partitionBy(part).map(_._2) val vertices = edges.map { e => (e.srcId, (e.dstId, e.attr)) }.groupByKey(part).map { case (id, seq) => val vdata = new SizedPriorityQueue(maxDegree) seq.foreach(vdata.enqueue) (id, vdata) } var g = GraphImpl(vertices, edges, new SizedPriorityQueue(maxDegree), storageLevel, storageLevel).cache() var msg: RDD[(VertexId, Long2DoubleOpenHashMap)] = null for (i <- 2 to step) { msg = g.aggregateMessages(mapF, reduceF) g = g.outerJoinVertices(msg)(updateF).persist(storageLevel) } g.vertices.map { case (vid, vdata) => var s = new StringBuilder s.append(vid) vdata.foreach { r => s.append(' ') s.append(r._1) s.append(':') s.append(r._2) } s.toString }.saveAsTextFile(output) } }
Example 11
Source File: PregelNWeight.scala From Swallow with Apache License 2.0 | 5 votes |
package com.intel.hibench.sparkbench.graph.nweight import scala.collection.JavaConversions._ import org.apache.spark.SparkContext import org.apache.spark.HashPartitioner import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ import org.apache.spark.graphx.impl.GraphImpl import it.unimi.dsi.fastutil.longs.Long2DoubleOpenHashMap object PregelNWeight extends Serializable{ def sendMsg(edge: EdgeTriplet[SizedPriorityQueue, Double]) = { val m = new Long2DoubleOpenHashMap() val w1 = edge.attr val id = edge.srcId edge.dstAttr.foreach{ case (target, wn) => if (target != id) m.put(target, wn*w1) } Iterator((id, m)) } def mergMsg(c1: Long2DoubleOpenHashMap, c2: Long2DoubleOpenHashMap) = { c2.long2DoubleEntrySet() .fastIterator() .foreach(pair => c1.put(pair.getLongKey(), c1.get(pair.getLongKey()) + pair.getDoubleValue())) c1 } def vProg(id: VertexId, vdata: SizedPriorityQueue, msg: Long2DoubleOpenHashMap) = { vdata.clear() if (msg.size > 0) { msg.long2DoubleEntrySet().fastIterator().foreach { pair => val src = pair.getLongKey() val wn = pair.getDoubleValue() vdata.enqueue((src, wn)) } vdata } else { vdata.enqueue((id, 1)) vdata } } def nweight(sc: SparkContext, input: String, output: String, step: Int, maxDegree: Int, numPartitions: Int, storageLevel: StorageLevel) { //val start1 = System.currentTimeMillis val part = new HashPartitioner(numPartitions) val edges = sc.textFile(input, numPartitions).flatMap { line => val fields = line.split("\\s+", 2) val src = fields(0).trim.toLong fields(1).split("[,\\s]+").filter(_.isEmpty() == false).map { pairStr => val pair = pairStr.split(":") val (dest, weight) = (pair(0).trim.toLong, pair(1).toDouble) (src, Edge(src, dest, weight)) } }.partitionBy(part).map(_._2) var g = GraphImpl(edges, new SizedPriorityQueue(maxDegree), storageLevel, storageLevel).cache() g = Pregel(g, new Long2DoubleOpenHashMap, step, EdgeDirection.In)( vProg, sendMsg, mergMsg) g.vertices.map { case (vid, vdata) => var s = new StringBuilder s.append(vid) vdata.foreach { r => s.append(' ') s.append(r._1) s.append(':') s.append(r._2) } s.toString }.saveAsTextFile(output) } }