org.apache.spark.graphx.Edge Java Exaples

Source File: GraphXGraphGenerator.java From rya with Apache License 2.0

8 votes

public Graph<RyaTypeWritable, RyaTypeWritable> createGraph(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    StorageLevel storageLvl1 = StorageLevel.MEMORY_ONLY();
    StorageLevel storageLvl2 = StorageLevel.MEMORY_ONLY();
    ClassTag<RyaTypeWritable> RTWTag = ClassTag$.MODULE$.apply(RyaTypeWritable.class);
    RyaTypeWritable rtw = null;
    RDD<Tuple2<Object, RyaTypeWritable>> vertexRDD = getVertexRDD(sc, conf);

    RDD<Tuple2<Object, Edge>> edgeRDD = getEdgeRDD(sc, conf);
    JavaRDD<Tuple2<Object, Edge>> jrddTuple = edgeRDD.toJavaRDD();
    JavaRDD<Edge<RyaTypeWritable>> jrdd = jrddTuple.map(tuple -> tuple._2);

    RDD<Edge<RyaTypeWritable>> goodERDD = JavaRDD.toRDD(jrdd);

    return Graph.apply(vertexRDD, goodERDD, rtw, storageLvl1, storageLvl2, RTWTag, RTWTag);
}

Source File: GraphXEdgeInputFormat.java From rya with Apache License 2.0

5 votes

/**
 * Load the next statement by converting the next Accumulo row to a
 * statement, and make the new (key,value) pair available for retrieval.
 *
 * @return true if another (key,value) pair was fetched and is ready to
 *         be retrieved, false if there was none.
 * @throws IOException
 *             if a row was loaded but could not be converted to a
 *             statement.
 */
@Override
public boolean nextKeyValue() throws IOException {
	if (!scannerIterator.hasNext()) {
              return false;
          }
	final Entry<Key, Value> entry = scannerIterator.next();
	++numKeysRead;
	currentKey = entry.getKey();
	try {
		currentK = currentKey.getRow();
		final RyaTypeWritable rtw = new RyaTypeWritable();
		final RyaStatement stmt = this.ryaContext.deserializeTriple(
				this.tableLayout, new TripleRow(entry.getKey().getRow()
						.getBytes(), entry.getKey().getColumnFamily()
						.getBytes(), entry.getKey()
						.getColumnQualifier().getBytes(), entry
						.getKey().getTimestamp(), entry.getKey()
						.getColumnVisibility().getBytes(), entry
						.getValue().get()));

		final long subHash = getVertexId(stmt.getSubject());
		final long objHash = getVertexId(stmt.getObject());
		rtw.setRyaType(stmt.getPredicate());

		final Edge<RyaTypeWritable> writable = new Edge<RyaTypeWritable>(
				subHash, objHash, rtw);
		currentV = writable;
	} catch (final TripleRowResolverException e) {
		throw new IOException(e);
	}
	return true;
}

Source File: SparkGraphXKickoff.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License

4 votes

public static void main(String[] args) throws InterruptedException {

        System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);

        final SparkConf conf = new SparkConf()
            .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
            .setAppName(APPLICATION_NAME);

        JavaSparkContext javaSparkContext = new JavaSparkContext(conf);        
        
        List<Tuple2<Object, String>> listOfVertex = new ArrayList<>();
        listOfVertex.add(new Tuple2<>(1l, "James"));
        listOfVertex.add(new Tuple2<>(2l, "Andy"));
        listOfVertex.add(new Tuple2<>(3l, "Ed"));
        listOfVertex.add(new Tuple2<>(4l, "Roger"));
        listOfVertex.add(new Tuple2<>(5l, "Tony"));

        List<Edge<String>> listOfEdge = new ArrayList<>();
        listOfEdge.add(new Edge<>(2, 1, "Friend"));
        listOfEdge.add(new Edge<>(3, 1, "Friend"));
        listOfEdge.add(new Edge<>(3, 2, "Colleague"));    
        listOfEdge.add(new Edge<>(3, 5, "Partner"));
        listOfEdge.add(new Edge<>(4, 3, "Boss"));        
        listOfEdge.add(new Edge<>(5, 2, "Partner"));       
    
        JavaRDD<Tuple2<Object, String>> vertexRDD = javaSparkContext.parallelize(listOfVertex);
        JavaRDD<Edge<String>> edgeRDD = javaSparkContext.parallelize(listOfEdge);

        ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
		
        Graph<String, String> graph = Graph.apply(
            vertexRDD.rdd(), 
            edgeRDD.rdd(), 
            "", 
            StorageLevel.MEMORY_ONLY(), 
			StorageLevel.MEMORY_ONLY(), 
			stringTag, 
			stringTag
            );    

        //apply specific algorithms, such as PageRank

        graph.vertices()
            .saveAsTextFile(VERTICES_FOLDER_PATH);        
			 
        graph.edges()
	    .saveAsTextFile(EDGES_FOLDER_PATH);        

        javaSparkContext.close();
    }

Source File: AbsFunc7.java From Apache-Spark-2x-for-Java-Developers with MIT License

4 votes

@Override
public Integer apply(Edge<String> edge) {
	return edge.attr().length();
}

Source File: PropertyGraphExampleFromEdges.java From Apache-Spark-2x-for-Java-Developers with MIT License

4 votes

public static void main(String[] args) {
		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
		SparkConf conf = new SparkConf().setMaster("local").setAppName("graph");
		JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
		ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);


		List<Edge<String>> edges = new ArrayList<>();

		edges.add(new Edge<String>(1, 2, "Friend"));
		edges.add(new Edge<String>(2, 3, "Advisor"));
		edges.add(new Edge<String>(1, 3, "Friend"));
		edges.add(new Edge<String>(4, 3, "colleague"));
		edges.add(new Edge<String>(4, 5, "Relative"));
		edges.add(new Edge<String>(2, 5, "BusinessPartners"));


		JavaRDD<Edge<String>> edgeRDD = javaSparkContext.parallelize(edges);
		
		
		Graph<String, String> graph = Graph.fromEdges(edgeRDD.rdd(), "",StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
		
		
		graph.vertices().toJavaRDD().collect().forEach(System.out::println);
		
		
		
//	graph.aggregateMessages(sendMsg, mergeMsg, tripletFields, evidence$11)	
		
	}

Source File: GraphXGraphGenerator.java From rya with Apache License 2.0

4 votes

public RDD<Tuple2<Object, Edge>> getEdgeRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    // Load configuration parameters
    zk = MRUtils.getACZK(conf);
    instance = MRUtils.getACInstance(conf);
    userName = MRUtils.getACUserName(conf);
    pwd = MRUtils.getACPwd(conf);
    mock = MRUtils.getACMock(conf, false);
    tablePrefix = MRUtils.getTablePrefix(conf);
    // Set authorizations if specified
    String authString = conf.get(MRUtils.AC_AUTH_PROP);
    if (authString != null && !authString.isEmpty()) {
        authorizations = new Authorizations(authString.split(","));
        conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
    }
    else {
        authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
    }
    // Set table prefix to the default if not set
    if (tablePrefix == null) {
        tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
        MRUtils.setTablePrefix(conf, tablePrefix);
    }
    // Check for required configuration parameters
    Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
    Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
    Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
    Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
    RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
    // If connecting to real accumulo, set additional parameters and require zookeepers
    if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
    // Ensure consistency between alternative configuration properties
    conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
    conf.set(ConfigUtils.CLOUDBASE_USER, userName);
    conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
    conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);

    Job job = Job.getInstance(conf, sc.appName());

    ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);

    RyaInputFormat.setTableLayout(job, TABLE_LAYOUT.SPO);
    RyaInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    RyaInputFormat.setZooKeeperInstance(job, clientConfig);
    RyaInputFormat.setScanAuthorizations(job, authorizations);
            String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix);
            InputFormatBase.setInputTableName(job, tableName);
    return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class);
}

Source File: GraphXEdgeInputFormatTest.java From rya with Apache License 2.0

4 votes

@SuppressWarnings("rawtypes")
@Test
public void testInputFormat() throws Exception {
    RyaStatement input = RyaStatement.builder()
        .setSubject(new RyaIRI("http://www.google.com"))
        .setPredicate(new RyaIRI("http://some_other_uri"))
        .setObject(new RyaIRI("http://www.yahoo.com"))
        .setColumnVisibility(new byte[0])
        .setValue(new byte[0])
        .build();

    apiImpl.add(input);

    Job jobConf = Job.getInstance();

    GraphXEdgeInputFormat.setMockInstance(jobConf, instance.getInstanceName());
    GraphXEdgeInputFormat.setConnectorInfo(jobConf, username, password);
    GraphXEdgeInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);

    GraphXEdgeInputFormat.setScanIsolation(jobConf, false);
    GraphXEdgeInputFormat.setLocalIterators(jobConf, false);
    GraphXEdgeInputFormat.setOfflineTableScan(jobConf, false);

    GraphXEdgeInputFormat inputFormat = new GraphXEdgeInputFormat();

    JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());

    List<InputSplit> splits = inputFormat.getSplits(context);

    Assert.assertEquals(1, splits.size());

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));

    RecordReader reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);

    RecordReader ryaStatementRecordReader = (RecordReader) reader;
    ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);

    List<Edge> results = new ArrayList<Edge>();
    while(ryaStatementRecordReader.nextKeyValue()) {
        Edge writable = (Edge) ryaStatementRecordReader.getCurrentValue();
        long srcId = writable.srcId();
        long destId = writable.dstId();
        RyaTypeWritable rtw = null;
        Object text = ryaStatementRecordReader.getCurrentKey();
        Edge<RyaTypeWritable> edge = new Edge<RyaTypeWritable>(srcId, destId, rtw);
        results.add(edge);

        System.out.println(text);
    }

    System.out.println(results.size());
    System.out.println(results);
    Assert.assertTrue(results.size() == 2);
}

Source File: GraphXEdgeInputFormat.java From rya with Apache License 2.0

2 votes

/**
 * Instantiates a RecordReader for this InputFormat and a given task and
 * input split.
 *
 * @param split
 *            Defines the portion of the input this RecordReader is
 *            responsible for.
 * @param context
 *            The context of the task.
 * @return A RecordReader that can be used to fetch RyaStatementWritables.
 */
@Override
public RecordReader<Object, Edge> createRecordReader(final InputSplit split,
		final TaskAttemptContext context) {
	return new RyaStatementRecordReader();
}

org.apache.spark.graphx.Edge Java Examples