Java Code Examples for org.apache.tinkerpop.gremlin.TestHelper#makeTestDataDirectory()
The following examples show how to use
org.apache.tinkerpop.gremlin.TestHelper#makeTestDataDirectory() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TinkerGraphTest.java From tinkergraph-gremlin with Apache License 2.0 | 6 votes |
@Test public void shouldPersistToGraphML() { final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGraphML.xml"; final File f = new File(graphLocation); if (f.exists() && f.isFile()) f.delete(); final Configuration conf = new BaseConfiguration(); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "graphml"); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation); final TinkerGraph graph = TinkerGraph.open(conf); TinkerFactory.generateModern(graph); graph.close(); final TinkerGraph reloadedGraph = TinkerGraph.open(conf); IoTest.assertModernGraph(reloadedGraph, true, true); reloadedGraph.close(); }
Example 2
Source File: TinkerGraphTest.java From tinkergraph-gremlin with Apache License 2.0 | 6 votes |
@Test public void shouldPersistToGraphSON() { final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGraphSON.json"; final File f = new File(graphLocation); if (f.exists() && f.isFile()) f.delete(); final Configuration conf = new BaseConfiguration(); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "graphson"); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation); final TinkerGraph graph = TinkerGraph.open(conf); TinkerFactory.generateModern(graph); graph.close(); final TinkerGraph reloadedGraph = TinkerGraph.open(conf); IoTest.assertModernGraph(reloadedGraph, true, false); reloadedGraph.close(); }
Example 3
Source File: TinkerGraphTest.java From tinkergraph-gremlin with Apache License 2.0 | 6 votes |
@Test public void shouldPersistToGryo() { final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGryo.kryo"; final File f = new File(graphLocation); if (f.exists() && f.isFile()) f.delete(); final Configuration conf = new BaseConfiguration(); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "gryo"); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation); final TinkerGraph graph = TinkerGraph.open(conf); TinkerFactory.generateModern(graph); graph.close(); final TinkerGraph reloadedGraph = TinkerGraph.open(conf); IoTest.assertModernGraph(reloadedGraph, true, false); reloadedGraph.close(); }
Example 4
Source File: TinkerGraphTest.java From tinkergraph-gremlin with Apache License 2.0 | 6 votes |
@Test public void shouldPersistToGryoAndHandleMultiProperties() { final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGryoMulti.kryo"; final File f = new File(graphLocation); if (f.exists() && f.isFile()) f.delete(); final Configuration conf = new BaseConfiguration(); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "gryo"); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation); final TinkerGraph graph = TinkerGraph.open(conf); TinkerFactory.generateTheCrew(graph); graph.close(); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_DEFAULT_VERTEX_PROPERTY_CARDINALITY, VertexProperty.Cardinality.list.toString()); final TinkerGraph reloadedGraph = TinkerGraph.open(conf); IoTest.assertCrewGraph(reloadedGraph, false); reloadedGraph.close(); }
Example 5
Source File: FileSystemStorageCheck.java From tinkerpop with Apache License 2.0 | 6 votes |
@Test public void shouldSupportDirectoryFileDistinction() throws Exception { // Make sure Spark is shut down before deleting its files and directories, // which are locked under Windows and fail the tests. See FileSystemStorageCheck graph.configuration().setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false); final Storage storage = FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration())); final String directory1 = TestHelper.makeTestDataDirectory(FileSystemStorageCheck.class, "directory1"); final String directory2 = TestHelper.makeTestDataDirectory(FileSystemStorageCheck.class, "directory2"); for (int i = 0; i < 10; i++) { new File(directory1, "file1-" + i + ".txt.bz").createNewFile(); } for (int i = 0; i < 5; i++) { new File(directory2, "file2-" + i + ".txt.bz").createNewFile(); } super.checkFileDirectoryDistinction(storage, directory1, directory2); deleteDirectory(directory1); deleteDirectory(directory2); }
Example 6
Source File: PersistedInputOutputRDDIntegrateTest.java From tinkerpop with Apache License 2.0 | 6 votes |
@Test public void shouldNotHaveDanglingPersistedComputeRDDs() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); Graph graph = GraphFactory.open(configuration); /// assertEquals(6, graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)).V().out().count().next().longValue()); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); // assertEquals(2, graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)).V().out().out().count().next().longValue()); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); /////// Spark.close(); }
Example 7
Source File: PersistedInputOutputRDDIntegrateTest.java From tinkerpop with Apache License 2.0 | 6 votes |
@Test public void shouldNotPersistRDDAcrossJobs() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false); // because the spark context is NOT persisted, neither is the RDD Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V()").create(graph)).submit().get(); //////// Spark.create("local[4]"); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); Spark.close(); }
Example 8
Source File: TinkerGraphTest.java From tinkergraph-gremlin with Apache License 2.0 | 5 votes |
@Test public void shouldPersistToAnyGraphFormat() { final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToAnyGraphFormat.dat"; final File f = new File(graphLocation); if (f.exists() && f.isFile()) f.delete(); final Configuration conf = new BaseConfiguration(); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, TestIoBuilder.class.getName()); conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation); final TinkerGraph graph = TinkerGraph.open(conf); TinkerFactory.generateModern(graph); //Test write graph graph.close(); assertEquals(TestIoBuilder.calledOnMapper, 1); assertEquals(TestIoBuilder.calledGraph, 1); assertEquals(TestIoBuilder.calledCreate, 1); try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(f))){ os.write("dummy string".getBytes()); } catch (Exception e) { e.printStackTrace(); } //Test read graph final TinkerGraph readGraph = TinkerGraph.open(conf); assertEquals(TestIoBuilder.calledOnMapper, 1); assertEquals(TestIoBuilder.calledGraph, 1); assertEquals(TestIoBuilder.calledCreate, 1); }
Example 9
Source File: FileSystemStorageCheck.java From tinkerpop with Apache License 2.0 | 5 votes |
@Test @LoadGraphWith(LoadGraphWith.GraphData.MODERN) public void shouldSupportCopyMethods() throws Exception { // Make sure Spark is shut down before deleting its files and directories, // which are locked under Windows and fail the tests. See FileSystemStorageCheck graph.configuration().setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false); final Storage storage = FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration())); final String outputLocation = graph.configuration().getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION); final String newOutputLocation = TestHelper.makeTestDataDirectory(FileSystemStorageCheck.class, "new-location-for-copy"); // TestHelper creates the directory and we need it not to exist deleteDirectory(newOutputLocation); super.checkCopyMethods(storage, outputLocation, newOutputLocation, InputOutputHelper.getInputFormat((Class) Class.forName(graph.configuration().getString(Constants.GREMLIN_HADOOP_GRAPH_WRITER))), SequenceFileInputFormat.class); }
Example 10
Source File: SparkContextStorageCheck.java From tinkerpop with Apache License 2.0 | 5 votes |
@Test @LoadGraphWith(LoadGraphWith.GraphData.MODERN) public void shouldSupportCopyMethods() throws Exception { final Storage storage = SparkContextStorage.open(graph.configuration()); final String outputLocation = graph.configuration().getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION); final String newOutputLocation = TestHelper.makeTestDataDirectory(this.getClass(), "new-location-for-copy"); super.checkCopyMethods(storage, outputLocation, newOutputLocation, PersistedInputRDD.class, PersistedInputRDD.class); }
Example 11
Source File: PersistedInputOutputRDDIntegrateTest.java From tinkerpop with Apache License 2.0 | 5 votes |
@Test public void shouldPersistRDDBasedOnStorageLevel() throws Exception { Spark.create("local[4]"); int counter = 0; for (final String storageLevel : Arrays.asList("MEMORY_ONLY", "DISK_ONLY", "MEMORY_ONLY_SER", "MEMORY_AND_DISK_SER")) { assertEquals(counter, Spark.getRDDs().size()); assertEquals(counter, Spark.getContext().getPersistentRDDs().size()); counter++; final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, storageLevel); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V().groupCount('m').by('name').out()").create(graph)).submit().get(); //////// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(StorageLevel.fromString(storageLevel), Spark.getRDD(Constants.getGraphLocation(rddName)).getStorageLevel()); assertEquals(counter, Spark.getRDDs().size()); assertEquals(counter, Spark.getContext().getPersistentRDDs().size()); } Spark.close(); }
Example 12
Source File: GryoSerializerIntegrateTest.java From tinkerpop with Apache License 2.0 | 4 votes |
@Test public void shouldHaveAllRegisteredGryoSerializerClasses() throws Exception { // this is a stress test that ensures that when data is spilling to disk, persisted to an RDD, etc. the correct classes are registered with GryoSerializer. final TinkerGraph randomGraph = TinkerGraph.open(); int totalVertices = 200000; TestHelper.createRandomGraph(randomGraph, totalVertices, 100); final String inputLocation = TestHelper.makeTestDataFile(GryoSerializerIntegrateTest.class, UUID.randomUUID().toString(), "random-graph.kryo"); randomGraph.io(IoCore.gryo()).writeGraph(inputLocation); randomGraph.clear(); randomGraph.close(); final String outputLocation = TestHelper.makeTestDataDirectory(GryoSerializerIntegrateTest.class, UUID.randomUUID().toString()); Configuration configuration = getBaseConfiguration(); configuration.clearProperty(Constants.SPARK_SERIALIZER); // ensure proper default to GryoSerializer configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, inputLocation); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false); Graph graph = GraphFactory.open(configuration); final GraphTraversal.Admin<Vertex, Map<Vertex, Collection<Vertex>>> traversal = graph.traversal().withComputer(SparkGraphComputer.class).V().group("m").<Map<Vertex, Collection<Vertex>>>cap("m").asAdmin(); assertTrue(traversal.hasNext()); assertEquals(traversal.next(), traversal.getSideEffects().get("m")); assertFalse(traversal.hasNext()); assertTrue(traversal.getSideEffects().exists("m")); assertTrue(traversal.getSideEffects().get("m") instanceof Map); assertEquals(totalVertices, traversal.getSideEffects().<Map>get("m").size()); configuration = getBaseConfiguration(); configuration.clearProperty(Constants.SPARK_SERIALIZER); // ensure proper default to GryoSerializer configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, inputLocation); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, "DISK_ONLY"); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "persisted-rdd"); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); graph = GraphFactory.open(configuration); assertEquals(totalVertices, graph.compute(SparkGraphComputer.class).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph().traversal().V().count().next().longValue()); configuration = getBaseConfiguration(); configuration.clearProperty(Constants.SPARK_SERIALIZER); // ensure proper default to GryoSerializer configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, "persisted-rdd"); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); graph = GraphFactory.open(configuration); assertEquals(totalVertices, graph.traversal().withComputer(SparkGraphComputer.class).V().count().next().longValue()); configuration = getBaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, "persisted-rdd"); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation); configuration.setProperty(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY"); // this should be ignored as you can't change the persistence level once created configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, "MEMORY_AND_DISK"); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); graph = GraphFactory.open(configuration); assertEquals(totalVertices, graph.traversal().withComputer(SparkGraphComputer.class).V().count().next().longValue()); }
Example 13
Source File: PersistedInputOutputRDDIntegrateTest.java From tinkerpop with Apache License 2.0 | 4 votes |
@Test public void shouldPersistRDDAcrossJobs() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final String rddName2 = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString()); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V().count()").create(graph)).submit().get(); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); /////// configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2); graph = GraphFactory.open(configuration); assertEquals(6, graph.traversal().withComputer(SparkGraphComputer.class).V().out().count().next().longValue()); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); /////// graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V().count()").create(graph)).submit().get(); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertEquals(2, Spark.getContext().getPersistentRDDs().size()); /////// configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2); graph = GraphFactory.open(configuration); assertEquals(6, graph.traversal().withComputer(SparkGraphComputer.class).V().out().count().next().longValue()); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); /////// graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V().count()").create(graph)).submit().get(); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertEquals(2, Spark.getContext().getPersistentRDDs().size()); /////// graph = GraphFactory.open(configuration); assertEquals(6, graph.traversal().withComputer(SparkGraphComputer.class).V().out().count().next().longValue()); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); Spark.close(); }
Example 14
Source File: PersistedInputOutputRDDIntegrateTest.java From tinkerpop with Apache License 2.0 | 4 votes |
@Test public void testComplexChain() throws Exception { Spark.create("local[4]"); final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD"); final String rddName2 = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD2"); final Configuration configuration = super.getBaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(0, Spark.getContext().getPersistentRDDs().size()); Graph graph = GraphFactory.open(configuration); graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.EDGES).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph(); GraphTraversalSource g = graph.traversal(); assertEquals(6l, g.V().count().next().longValue()); assertEquals(6l, g.E().count().next().longValue()); assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); //// configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2); //// graph = GraphFactory.open(configuration); graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.EDGES).mapReduce(PageRankMapReduce.build().create()).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph(); g = graph.traversal(); assertEquals(6l, g.V().count().next().longValue()); assertEquals(6l, g.E().count().next().longValue()); assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertTrue(Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY))); assertEquals(3, Spark.getContext().getPersistentRDDs().size()); //// graph = GraphFactory.open(configuration); graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.VERTEX_PROPERTIES).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph(); g = graph.traversal(); assertEquals(6l, g.V().count().next().longValue()); assertEquals(0l, g.E().count().next().longValue()); assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertFalse(Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY))); assertEquals(2, Spark.getContext().getPersistentRDDs().size()); //// graph = GraphFactory.open(configuration); graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.NOTHING).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph(); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2))); g = graph.traversal(); assertEquals(0l, g.V().count().next().longValue()); assertEquals(0l, g.E().count().next().longValue()); assertEquals(0l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue()); //// assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2))); assertFalse(Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY))); assertEquals(1, Spark.getContext().getPersistentRDDs().size()); Spark.close(); }
Example 15
Source File: LocalPropertyTest.java From tinkerpop with Apache License 2.0 | 4 votes |
@Test public void shouldSetThreadLocalProperties() throws Exception { final String testName = "ThreadLocalProperties"; final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class, UUID.randomUUID().toString()); final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName()); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); configuration.setProperty("spark.jobGroup.id", "22"); Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)), "gremlin-groovy", "g.V()").create(graph)).submit().get(); //////// SparkConf sparkConfiguration = new SparkConf(); sparkConfiguration.setAppName(testName); ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue())); JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration)); JavaSparkStatusTracker statusTracker = sparkContext.statusTracker(); assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); /////// configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null); // just a note that this value should have always been set to true, but from the initial commit was false. // interestingly the last assertion had always passed up to spark 2.3.x when it started to fail. apparently // that assertion should likely have never passed, so it stands to reason that there was a bug in spark in // 2.2.x that was resolved for 2.3.x....that's my story and i'm sticking to it. configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); configuration.setProperty("spark.jobGroup.id", "44"); graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.NOTHING) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V()").create(graph)).submit().get(); /////// assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1); }