org.apache.spark.SparkContext#getOrCreate

Source File: SparkMaster.java From GeoTriples with Apache License 2.0

6 votes

/**
 * Convert the input Dataset into RDF triples and store the results.
 * The conversion is taking place per Partitions using the mapPartition Spark transformation.
 * @param mapping_list list of TripleMaps
 */
private void convert_partition(ArrayList<TriplesMap> mapping_list){
    SparkContext sc = SparkContext.getOrCreate();

    Pair<ArrayList<TriplesMap>, List<String>> transformation_info = new Pair<>(mapping_list, Arrays.asList(reader.getHeaders()));
    ClassTag<Pair<ArrayList<TriplesMap>, List<String>>> classTag_pair = scala.reflect.ClassTag$.MODULE$.apply(Pair.class);
    Broadcast<Pair<ArrayList<TriplesMap>, List<String>>> bd_info = sc.broadcast(transformation_info, classTag_pair);

    rowRDD
        .mapPartitions(
        (Iterator<Row> rows_iter) -> {
            ArrayList<TriplesMap> p_mapping_list = bd_info.value().getKey();
            List<String> p_header = bd_info.value().getValue();
            RML_Converter rml_converter = new RML_Converter(p_mapping_list, p_header);
            rml_converter.start();
            rml_converter.registerFunctions();
            Iterator<String> triples = rml_converter.convertPartition(rows_iter);

            rml_converter.stop();
            return triples;
        })
        .saveAsTextFile(outputDir);
}

Source File: SparkMaster.java From GeoTriples with Apache License 2.0

6 votes

/**
 * Convert the input Dataset into RDF triples and store the results.
 * The conversion is taking place per Per using the map Spark transformation.
 * @param mapping_list list of TripleMaps
 */
private void convert_row(ArrayList<TriplesMap> mapping_list){

    SparkContext sc = SparkContext.getOrCreate();

    RML_Converter rml_converter = new RML_Converter(mapping_list, Arrays.asList(reader.getHeaders()));
    ClassTag<RML_Converter> classTagRML_Converter = scala.reflect.ClassTag$.MODULE$.apply(RML_Converter.class);
    Broadcast<RML_Converter> bc_converter = sc.broadcast(rml_converter, classTagRML_Converter);

    ClassTag<HashMap<URI, Function>> classTag_hashMap = scala.reflect.ClassTag$.MODULE$.apply(HashMap.class);
    Broadcast<HashMap<URI, Function>> bc_functionsHashMap = sc.broadcast(FunctionFactory.availableFunctions, classTag_hashMap);
    rowRDD
        .map((row) ->  {
            FunctionFactory.availableFunctions = bc_functionsHashMap.value();
            return bc_converter.value().convertRow(row);
        } )
        .saveAsTextFile(outputDir);
}

Source File: SparkTextFileBoundedSourceVertex.java From incubator-nemo with Apache License 2.0

5 votes

@Override
protected Iterator<String> initializeIterator() {
  // for setting up the same environment in the executors.
  final SparkContext sparkContext = SparkContext.getOrCreate(sparkConf);

  // Spark does lazy evaluation: it doesn't load the full data in rdd, but only the partition it is asked for.
  final RDD<String> rdd = sparkContext.textFile(inputPath, numPartitions);
  final Iterable<String> iterable = () -> JavaConverters.asJavaIteratorConverter(
    rdd.iterator(rdd.getPartitions()[partitionIndex], TaskContext$.MODULE$.empty())).asJava();
  return iterable.iterator();
}

Source File: ConverterFactory.java From jpmml-sparkml with GNU Affero General Public License v3.0

5 votes

static
public void checkVersion(){
	SparkContext sparkContext = SparkContext.getOrCreate();

	int[] version = parseVersion(sparkContext.version());

	if(!Arrays.equals(ConverterFactory.VERSION, version)){
		throw new IllegalArgumentException("Expected Apache Spark ML version " + formatVersion(ConverterFactory.VERSION) + ", got version " + formatVersion(version) + " (" + sparkContext.version() + ")");
	}
}

Source File: Spark1Shims.java From zeppelin with Apache License 2.0

5 votes

public void setupSparkListener(final String master,
                               final String sparkWebUrl,
                               final InterpreterContext context) {
  SparkContext sc = SparkContext.getOrCreate();
  sc.addSparkListener(new JobProgressListener(sc.getConf()) {
    @Override
    public void onJobStart(SparkListenerJobStart jobStart) {
      if (sc.getConf().getBoolean("spark.ui.enabled", true) &&
          !Boolean.parseBoolean(properties.getProperty("zeppelin.spark.ui.hidden", "false"))) {
        buildSparkJobUrl(master, sparkWebUrl, jobStart.jobId(), jobStart.properties(), context);
      }
    }
  });
}

Source File: Spark3Shims.java From zeppelin with Apache License 2.0

5 votes

public void setupSparkListener(final String master,
                               final String sparkWebUrl,
                               final InterpreterContext context) {
  SparkContext sc = SparkContext.getOrCreate();
  sc.addSparkListener(new SparkListener() {
    @Override
    public void onJobStart(SparkListenerJobStart jobStart) {

      if (sc.getConf().getBoolean("spark.ui.enabled", true) &&
          !Boolean.parseBoolean(properties.getProperty("zeppelin.spark.ui.hidden", "false"))) {
        buildSparkJobUrl(master, sparkWebUrl, jobStart.jobId(), jobStart.properties(), context);
      }
    }
  });
}

Source File: Spark2Shims.java From zeppelin with Apache License 2.0

5 votes

public void setupSparkListener(final String master,
                               final String sparkWebUrl,
                               final InterpreterContext context) {
  SparkContext sc = SparkContext.getOrCreate();
  sc.addSparkListener(new SparkListener() {
    @Override
    public void onJobStart(SparkListenerJobStart jobStart) {

      if (sc.getConf().getBoolean("spark.ui.enabled", true) &&
          !Boolean.parseBoolean(properties.getProperty("zeppelin.spark.ui.hidden", "false"))) {
        buildSparkJobUrl(master, sparkWebUrl, jobStart.jobId(), jobStart.properties(), context);
      }
    }
  });
}

Source File: Spark.java From tinkerpop with Apache License 2.0

5 votes

public static SparkContext create(final SparkConf sparkConf) {
    if (isContextNullOrStopped()) {
        sparkConf.setAppName("Apache TinkerPop's Spark-Gremlin");
        CONTEXT = SparkContext.getOrCreate(sparkConf);
    }
    return CONTEXT;
}

Source File: Spark.java From tinkerpop with Apache License 2.0

5 votes

public static SparkContext recreateStopped() {
    if (null == CONTEXT)
        throw new IllegalStateException("The Spark context has not been created.");
    if (!CONTEXT.isStopped())
        throw new IllegalStateException("The Spark context is not stopped.");
    CONTEXT = SparkContext.getOrCreate(CONTEXT.getConf());
    return CONTEXT;
}

Source File: LocalPropertyTest.java From tinkerpop with Apache License 2.0

4 votes

@Test
public void shouldSetThreadLocalProperties() throws Exception {
    final String testName = "ThreadLocalProperties";
    final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class, UUID.randomUUID().toString());
    final Configuration configuration = new BaseConfiguration();
    configuration.setProperty("spark.master", "local[4]");
    configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
    configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    configuration.setProperty("spark.jobGroup.id", "22");
    Graph graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ////////
    SparkConf sparkConfiguration = new SparkConf();
    sparkConfiguration.setAppName(testName);
    ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
    JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
    JavaSparkStatusTracker statusTracker = sparkContext.statusTracker();
    assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1);
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    ///////
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null);
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);

    // just a note that this value should have always been set to true, but from the initial commit was false.
    // interestingly the last assertion had always passed up to spark 2.3.x when it started to fail. apparently
    // that assertion should likely have never passed, so it stands to reason that there was a bug in spark in
    // 2.2.x that was resolved for 2.3.x....that's my story and i'm sticking to it.
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    configuration.setProperty("spark.jobGroup.id", "44");
    graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.NOTHING)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ///////
    assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1);
}

Java Code Examples for org.apache.spark.SparkContext#getOrCreate()