Java Code Examples for org.apache.spark.SparkContext#getOrCreate()
The following examples show how to use
org.apache.spark.SparkContext#getOrCreate() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkMaster.java From GeoTriples with Apache License 2.0 | 6 votes |
/** * Convert the input Dataset into RDF triples and store the results. * The conversion is taking place per Partitions using the mapPartition Spark transformation. * @param mapping_list list of TripleMaps */ private void convert_partition(ArrayList<TriplesMap> mapping_list){ SparkContext sc = SparkContext.getOrCreate(); Pair<ArrayList<TriplesMap>, List<String>> transformation_info = new Pair<>(mapping_list, Arrays.asList(reader.getHeaders())); ClassTag<Pair<ArrayList<TriplesMap>, List<String>>> classTag_pair = scala.reflect.ClassTag$.MODULE$.apply(Pair.class); Broadcast<Pair<ArrayList<TriplesMap>, List<String>>> bd_info = sc.broadcast(transformation_info, classTag_pair); rowRDD .mapPartitions( (Iterator<Row> rows_iter) -> { ArrayList<TriplesMap> p_mapping_list = bd_info.value().getKey(); List<String> p_header = bd_info.value().getValue(); RML_Converter rml_converter = new RML_Converter(p_mapping_list, p_header); rml_converter.start(); rml_converter.registerFunctions(); Iterator<String> triples = rml_converter.convertPartition(rows_iter); rml_converter.stop(); return triples; }) .saveAsTextFile(outputDir); }
Example 2
Source File: SparkMaster.java From GeoTriples with Apache License 2.0 | 6 votes |
/** * Convert the input Dataset into RDF triples and store the results. * The conversion is taking place per Per using the map Spark transformation. * @param mapping_list list of TripleMaps */ private void convert_row(ArrayList<TriplesMap> mapping_list){ SparkContext sc = SparkContext.getOrCreate(); RML_Converter rml_converter = new RML_Converter(mapping_list, Arrays.asList(reader.getHeaders())); ClassTag<RML_Converter> classTagRML_Converter = scala.reflect.ClassTag$.MODULE$.apply(RML_Converter.class); Broadcast<RML_Converter> bc_converter = sc.broadcast(rml_converter, classTagRML_Converter); ClassTag<HashMap<URI, Function>> classTag_hashMap = scala.reflect.ClassTag$.MODULE$.apply(HashMap.class); Broadcast<HashMap<URI, Function>> bc_functionsHashMap = sc.broadcast(FunctionFactory.availableFunctions, classTag_hashMap); rowRDD .map((row) -> { FunctionFactory.availableFunctions = bc_functionsHashMap.value(); return bc_converter.value().convertRow(row); } ) .saveAsTextFile(outputDir); }
Example 3
Source File: SparkTextFileBoundedSourceVertex.java From incubator-nemo with Apache License 2.0 | 5 votes |
@Override protected Iterator<String> initializeIterator() { // for setting up the same environment in the executors. final SparkContext sparkContext = SparkContext.getOrCreate(sparkConf); // Spark does lazy evaluation: it doesn't load the full data in rdd, but only the partition it is asked for. final RDD<String> rdd = sparkContext.textFile(inputPath, numPartitions); final Iterable<String> iterable = () -> JavaConverters.asJavaIteratorConverter( rdd.iterator(rdd.getPartitions()[partitionIndex], TaskContext$.MODULE$.empty())).asJava(); return iterable.iterator(); }
Example 4
Source File: ConverterFactory.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
static public void checkVersion(){ SparkContext sparkContext = SparkContext.getOrCreate(); int[] version = parseVersion(sparkContext.version()); if(!Arrays.equals(ConverterFactory.VERSION, version)){ throw new IllegalArgumentException("Expected Apache Spark ML version " + formatVersion(ConverterFactory.VERSION) + ", got version " + formatVersion(version) + " (" + sparkContext.version() + ")"); } }
Example 5
Source File: Spark1Shims.java From zeppelin with Apache License 2.0 | 5 votes |
public void setupSparkListener(final String master, final String sparkWebUrl, final InterpreterContext context) { SparkContext sc = SparkContext.getOrCreate(); sc.addSparkListener(new JobProgressListener(sc.getConf()) { @Override public void onJobStart(SparkListenerJobStart jobStart) { if (sc.getConf().getBoolean("spark.ui.enabled", true) && !Boolean.parseBoolean(properties.getProperty("zeppelin.spark.ui.hidden", "false"))) { buildSparkJobUrl(master, sparkWebUrl, jobStart.jobId(), jobStart.properties(), context); } } }); }
Example 6
Source File: Spark3Shims.java From zeppelin with Apache License 2.0 | 5 votes |
public void setupSparkListener(final String master, final String sparkWebUrl, final InterpreterContext context) { SparkContext sc = SparkContext.getOrCreate(); sc.addSparkListener(new SparkListener() { @Override public void onJobStart(SparkListenerJobStart jobStart) { if (sc.getConf().getBoolean("spark.ui.enabled", true) && !Boolean.parseBoolean(properties.getProperty("zeppelin.spark.ui.hidden", "false"))) { buildSparkJobUrl(master, sparkWebUrl, jobStart.jobId(), jobStart.properties(), context); } } }); }
Example 7
Source File: Spark2Shims.java From zeppelin with Apache License 2.0 | 5 votes |
public void setupSparkListener(final String master, final String sparkWebUrl, final InterpreterContext context) { SparkContext sc = SparkContext.getOrCreate(); sc.addSparkListener(new SparkListener() { @Override public void onJobStart(SparkListenerJobStart jobStart) { if (sc.getConf().getBoolean("spark.ui.enabled", true) && !Boolean.parseBoolean(properties.getProperty("zeppelin.spark.ui.hidden", "false"))) { buildSparkJobUrl(master, sparkWebUrl, jobStart.jobId(), jobStart.properties(), context); } } }); }
Example 8
Source File: Spark.java From tinkerpop with Apache License 2.0 | 5 votes |
public static SparkContext create(final SparkConf sparkConf) { if (isContextNullOrStopped()) { sparkConf.setAppName("Apache TinkerPop's Spark-Gremlin"); CONTEXT = SparkContext.getOrCreate(sparkConf); } return CONTEXT; }
Example 9
Source File: Spark.java From tinkerpop with Apache License 2.0 | 5 votes |
public static SparkContext recreateStopped() { if (null == CONTEXT) throw new IllegalStateException("The Spark context has not been created."); if (!CONTEXT.isStopped()) throw new IllegalStateException("The Spark context is not stopped."); CONTEXT = SparkContext.getOrCreate(CONTEXT.getConf()); return CONTEXT; }
Example 10
Source File: LocalPropertyTest.java From tinkerpop with Apache License 2.0 | 4 votes |
@Test public void shouldSetThreadLocalProperties() throws Exception { final String testName = "ThreadLocalProperties"; final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class, UUID.randomUUID().toString()); final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName()); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); configuration.setProperty("spark.jobGroup.id", "22"); Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)), "gremlin-groovy", "g.V()").create(graph)).submit().get(); //////// SparkConf sparkConfiguration = new SparkConf(); sparkConfiguration.setAppName(testName); ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue())); JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration)); JavaSparkStatusTracker statusTracker = sparkContext.statusTracker(); assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); /////// configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null); // just a note that this value should have always been set to true, but from the initial commit was false. // interestingly the last assertion had always passed up to spark 2.3.x when it started to fail. apparently // that assertion should likely have never passed, so it stands to reason that there was a bug in spark in // 2.2.x that was resolved for 2.3.x....that's my story and i'm sticking to it. configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); configuration.setProperty("spark.jobGroup.id", "44"); graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.NOTHING) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V()").create(graph)).submit().get(); /////// assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1); }