org.apache.spark.graphx.Edge Java Examples
The following examples show how to use
org.apache.spark.graphx.Edge.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GraphXGraphGenerator.java From rya with Apache License 2.0 | 8 votes |
public Graph<RyaTypeWritable, RyaTypeWritable> createGraph(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{ StorageLevel storageLvl1 = StorageLevel.MEMORY_ONLY(); StorageLevel storageLvl2 = StorageLevel.MEMORY_ONLY(); ClassTag<RyaTypeWritable> RTWTag = ClassTag$.MODULE$.apply(RyaTypeWritable.class); RyaTypeWritable rtw = null; RDD<Tuple2<Object, RyaTypeWritable>> vertexRDD = getVertexRDD(sc, conf); RDD<Tuple2<Object, Edge>> edgeRDD = getEdgeRDD(sc, conf); JavaRDD<Tuple2<Object, Edge>> jrddTuple = edgeRDD.toJavaRDD(); JavaRDD<Edge<RyaTypeWritable>> jrdd = jrddTuple.map(tuple -> tuple._2); RDD<Edge<RyaTypeWritable>> goodERDD = JavaRDD.toRDD(jrdd); return Graph.apply(vertexRDD, goodERDD, rtw, storageLvl1, storageLvl2, RTWTag, RTWTag); }
Example #2
Source File: GraphXEdgeInputFormat.java From rya with Apache License 2.0 | 5 votes |
/** * Load the next statement by converting the next Accumulo row to a * statement, and make the new (key,value) pair available for retrieval. * * @return true if another (key,value) pair was fetched and is ready to * be retrieved, false if there was none. * @throws IOException * if a row was loaded but could not be converted to a * statement. */ @Override public boolean nextKeyValue() throws IOException { if (!scannerIterator.hasNext()) { return false; } final Entry<Key, Value> entry = scannerIterator.next(); ++numKeysRead; currentKey = entry.getKey(); try { currentK = currentKey.getRow(); final RyaTypeWritable rtw = new RyaTypeWritable(); final RyaStatement stmt = this.ryaContext.deserializeTriple( this.tableLayout, new TripleRow(entry.getKey().getRow() .getBytes(), entry.getKey().getColumnFamily() .getBytes(), entry.getKey() .getColumnQualifier().getBytes(), entry .getKey().getTimestamp(), entry.getKey() .getColumnVisibility().getBytes(), entry .getValue().get())); final long subHash = getVertexId(stmt.getSubject()); final long objHash = getVertexId(stmt.getObject()); rtw.setRyaType(stmt.getPredicate()); final Edge<RyaTypeWritable> writable = new Edge<RyaTypeWritable>( subHash, objHash, rtw); currentV = writable; } catch (final TripleRowResolverException e) { throw new IOException(e); } return true; }
Example #3
Source File: SparkGraphXKickoff.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License | 4 votes |
public static void main(String[] args) throws InterruptedException { System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE); final SparkConf conf = new SparkConf() .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES) .setAppName(APPLICATION_NAME); JavaSparkContext javaSparkContext = new JavaSparkContext(conf); List<Tuple2<Object, String>> listOfVertex = new ArrayList<>(); listOfVertex.add(new Tuple2<>(1l, "James")); listOfVertex.add(new Tuple2<>(2l, "Andy")); listOfVertex.add(new Tuple2<>(3l, "Ed")); listOfVertex.add(new Tuple2<>(4l, "Roger")); listOfVertex.add(new Tuple2<>(5l, "Tony")); List<Edge<String>> listOfEdge = new ArrayList<>(); listOfEdge.add(new Edge<>(2, 1, "Friend")); listOfEdge.add(new Edge<>(3, 1, "Friend")); listOfEdge.add(new Edge<>(3, 2, "Colleague")); listOfEdge.add(new Edge<>(3, 5, "Partner")); listOfEdge.add(new Edge<>(4, 3, "Boss")); listOfEdge.add(new Edge<>(5, 2, "Partner")); JavaRDD<Tuple2<Object, String>> vertexRDD = javaSparkContext.parallelize(listOfVertex); JavaRDD<Edge<String>> edgeRDD = javaSparkContext.parallelize(listOfEdge); ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class); Graph<String, String> graph = Graph.apply( vertexRDD.rdd(), edgeRDD.rdd(), "", StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag ); //apply specific algorithms, such as PageRank graph.vertices() .saveAsTextFile(VERTICES_FOLDER_PATH); graph.edges() .saveAsTextFile(EDGES_FOLDER_PATH); javaSparkContext.close(); }
Example #4
Source File: AbsFunc7.java From Apache-Spark-2x-for-Java-Developers with MIT License | 4 votes |
@Override public Integer apply(Edge<String> edge) { return edge.attr().length(); }
Example #5
Source File: PropertyGraphExampleFromEdges.java From Apache-Spark-2x-for-Java-Developers with MIT License | 4 votes |
public static void main(String[] args) { System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils"); SparkConf conf = new SparkConf().setMaster("local").setAppName("graph"); JavaSparkContext javaSparkContext = new JavaSparkContext(conf); ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class); List<Edge<String>> edges = new ArrayList<>(); edges.add(new Edge<String>(1, 2, "Friend")); edges.add(new Edge<String>(2, 3, "Advisor")); edges.add(new Edge<String>(1, 3, "Friend")); edges.add(new Edge<String>(4, 3, "colleague")); edges.add(new Edge<String>(4, 5, "Relative")); edges.add(new Edge<String>(2, 5, "BusinessPartners")); JavaRDD<Edge<String>> edgeRDD = javaSparkContext.parallelize(edges); Graph<String, String> graph = Graph.fromEdges(edgeRDD.rdd(), "",StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag); graph.vertices().toJavaRDD().collect().forEach(System.out::println); // graph.aggregateMessages(sendMsg, mergeMsg, tripletFields, evidence$11) }
Example #6
Source File: GraphXGraphGenerator.java From rya with Apache License 2.0 | 4 votes |
public RDD<Tuple2<Object, Edge>> getEdgeRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{ // Load configuration parameters zk = MRUtils.getACZK(conf); instance = MRUtils.getACInstance(conf); userName = MRUtils.getACUserName(conf); pwd = MRUtils.getACPwd(conf); mock = MRUtils.getACMock(conf, false); tablePrefix = MRUtils.getTablePrefix(conf); // Set authorizations if specified String authString = conf.get(MRUtils.AC_AUTH_PROP); if (authString != null && !authString.isEmpty()) { authorizations = new Authorizations(authString.split(",")); conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency } else { authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS; } // Set table prefix to the default if not set if (tablePrefix == null) { tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF; MRUtils.setTablePrefix(conf, tablePrefix); } // Check for required configuration parameters Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set."); Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set."); Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set."); Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set."); RdfCloudTripleStoreConstants.prefixTables(tablePrefix); // If connecting to real accumulo, set additional parameters and require zookeepers if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency // Ensure consistency between alternative configuration properties conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance); conf.set(ConfigUtils.CLOUDBASE_USER, userName); conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd); conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock); conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix); Job job = Job.getInstance(conf, sc.appName()); ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk); RyaInputFormat.setTableLayout(job, TABLE_LAYOUT.SPO); RyaInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd)); RyaInputFormat.setZooKeeperInstance(job, clientConfig); RyaInputFormat.setScanAuthorizations(job, authorizations); String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix); InputFormatBase.setInputTableName(job, tableName); return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class); }
Example #7
Source File: GraphXEdgeInputFormatTest.java From rya with Apache License 2.0 | 4 votes |
@SuppressWarnings("rawtypes") @Test public void testInputFormat() throws Exception { RyaStatement input = RyaStatement.builder() .setSubject(new RyaIRI("http://www.google.com")) .setPredicate(new RyaIRI("http://some_other_uri")) .setObject(new RyaIRI("http://www.yahoo.com")) .setColumnVisibility(new byte[0]) .setValue(new byte[0]) .build(); apiImpl.add(input); Job jobConf = Job.getInstance(); GraphXEdgeInputFormat.setMockInstance(jobConf, instance.getInstanceName()); GraphXEdgeInputFormat.setConnectorInfo(jobConf, username, password); GraphXEdgeInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO); GraphXEdgeInputFormat.setInputTableName(jobConf, table); GraphXEdgeInputFormat.setInputTableName(jobConf, table); GraphXEdgeInputFormat.setScanIsolation(jobConf, false); GraphXEdgeInputFormat.setLocalIterators(jobConf, false); GraphXEdgeInputFormat.setOfflineTableScan(jobConf, false); GraphXEdgeInputFormat inputFormat = new GraphXEdgeInputFormat(); JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID()); List<InputSplit> splits = inputFormat.getSplits(context); Assert.assertEquals(1, splits.size()); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1)); RecordReader reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext); RecordReader ryaStatementRecordReader = (RecordReader) reader; ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext); List<Edge> results = new ArrayList<Edge>(); while(ryaStatementRecordReader.nextKeyValue()) { Edge writable = (Edge) ryaStatementRecordReader.getCurrentValue(); long srcId = writable.srcId(); long destId = writable.dstId(); RyaTypeWritable rtw = null; Object text = ryaStatementRecordReader.getCurrentKey(); Edge<RyaTypeWritable> edge = new Edge<RyaTypeWritable>(srcId, destId, rtw); results.add(edge); System.out.println(text); } System.out.println(results.size()); System.out.println(results); Assert.assertTrue(results.size() == 2); }
Example #8
Source File: GraphXEdgeInputFormat.java From rya with Apache License 2.0 | 2 votes |
/** * Instantiates a RecordReader for this InputFormat and a given task and * input split. * * @param split * Defines the portion of the input this RecordReader is * responsible for. * @param context * The context of the task. * @return A RecordReader that can be used to fetch RyaStatementWritables. */ @Override public RecordReader<Object, Edge> createRecordReader(final InputSplit split, final TaskAttemptContext context) { return new RyaStatementRecordReader(); }