Java Code Examples for org.apache.spark.SparkContext#broadcast()

The following examples show how to use org.apache.spark.SparkContext#broadcast() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SparkMaster.java From GeoTriples with Apache License 2.0

6 votes

/**
 * Convert the input Dataset into RDF triples and store the results.
 * The conversion is taking place per Partitions using the mapPartition Spark transformation.
 * @param mapping_list list of TripleMaps
 */
private void convert_partition(ArrayList<TriplesMap> mapping_list){
    SparkContext sc = SparkContext.getOrCreate();

    Pair<ArrayList<TriplesMap>, List<String>> transformation_info = new Pair<>(mapping_list, Arrays.asList(reader.getHeaders()));
    ClassTag<Pair<ArrayList<TriplesMap>, List<String>>> classTag_pair = scala.reflect.ClassTag$.MODULE$.apply(Pair.class);
    Broadcast<Pair<ArrayList<TriplesMap>, List<String>>> bd_info = sc.broadcast(transformation_info, classTag_pair);

    rowRDD
        .mapPartitions(
        (Iterator<Row> rows_iter) -> {
            ArrayList<TriplesMap> p_mapping_list = bd_info.value().getKey();
            List<String> p_header = bd_info.value().getValue();
            RML_Converter rml_converter = new RML_Converter(p_mapping_list, p_header);
            rml_converter.start();
            rml_converter.registerFunctions();
            Iterator<String> triples = rml_converter.convertPartition(rows_iter);

            rml_converter.stop();
            return triples;
        })
        .saveAsTextFile(outputDir);
}

Example 2

Source File: SparkMaster.java From GeoTriples with Apache License 2.0

6 votes

/**
 * Convert the input Dataset into RDF triples and store the results.
 * The conversion is taking place per Per using the map Spark transformation.
 * @param mapping_list list of TripleMaps
 */
private void convert_row(ArrayList<TriplesMap> mapping_list){

    SparkContext sc = SparkContext.getOrCreate();

    RML_Converter rml_converter = new RML_Converter(mapping_list, Arrays.asList(reader.getHeaders()));
    ClassTag<RML_Converter> classTagRML_Converter = scala.reflect.ClassTag$.MODULE$.apply(RML_Converter.class);
    Broadcast<RML_Converter> bc_converter = sc.broadcast(rml_converter, classTagRML_Converter);

    ClassTag<HashMap<URI, Function>> classTag_hashMap = scala.reflect.ClassTag$.MODULE$.apply(HashMap.class);
    Broadcast<HashMap<URI, Function>> bc_functionsHashMap = sc.broadcast(FunctionFactory.availableFunctions, classTag_hashMap);
    rowRDD
        .map((row) ->  {
            FunctionFactory.availableFunctions = bc_functionsHashMap.value();
            return bc_converter.value().convertRow(row);
        } )
        .saveAsTextFile(outputDir);
}

Example 3

Source File: DeepRDD.java From deep-spark with Apache License 2.0

5 votes

public DeepRDD(SparkContext sc, S config) {
    super(sc, scala.collection.Seq$.MODULE$.empty(), ClassTag$.MODULE$.<T>apply(config
            .getEntityClass()));
    config.setRddId(id());
    this.config =
            sc.broadcast(config, ClassTag$.MODULE$
                    .<S>apply(config.getClass()));

}

Example 4

Source File: RDDUtils.java From geowave with Apache License 2.0

5 votes

/**
 * Translate a set of objects in a JavaRDD to a provided type and push to GeoWave
 *
 * @throws IOException
 */
private static void writeToGeoWave(
    final SparkContext sc,
    final Index index,
    final DataStorePluginOptions outputStoreOptions,
    final DataTypeAdapter adapter,
    final JavaRDD<SimpleFeature> inputRDD) throws IOException {

  // setup the configuration and the output format
  final Configuration conf = new org.apache.hadoop.conf.Configuration(sc.hadoopConfiguration());

  GeoWaveOutputFormat.setStoreOptions(conf, outputStoreOptions);
  GeoWaveOutputFormat.addIndex(conf, index);
  GeoWaveOutputFormat.addDataAdapter(conf, adapter);

  // create the job
  final Job job = new Job(conf);
  job.setOutputKeyClass(GeoWaveOutputKey.class);
  job.setOutputValueClass(SimpleFeature.class);
  job.setOutputFormatClass(GeoWaveOutputFormat.class);

  // broadcast string names
  final ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
  final Broadcast<String> typeName = sc.broadcast(adapter.getTypeName(), stringTag);
  final Broadcast<String> indexName = sc.broadcast(index.getName(), stringTag);

  // map to a pair containing the output key and the output value
  inputRDD.mapToPair(
      feat -> new Tuple2<>(
          new GeoWaveOutputKey(typeName.value(), indexName.value()),
          feat)).saveAsNewAPIHadoopDataset(job.getConfiguration());
}

Example 5

Source File: RDDUtils.java From geowave with Apache License 2.0

5 votes

public static void writeRasterToGeoWave(
    final SparkContext sc,
    final Index index,
    final DataStorePluginOptions outputStoreOptions,
    final RasterDataAdapter adapter,
    final JavaRDD<GridCoverage> inputRDD) throws IOException {

  // setup the configuration and the output format
  final Configuration conf = new org.apache.hadoop.conf.Configuration(sc.hadoopConfiguration());

  GeoWaveOutputFormat.setStoreOptions(conf, outputStoreOptions);
  GeoWaveOutputFormat.addIndex(conf, index);
  GeoWaveOutputFormat.addDataAdapter(conf, adapter);

  // create the job
  final Job job = new Job(conf);
  job.setOutputKeyClass(GeoWaveOutputKey.class);
  job.setOutputValueClass(GridCoverage.class);
  job.setOutputFormatClass(GeoWaveOutputFormat.class);

  // broadcast string names
  final ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
  final Broadcast<String> typeName = sc.broadcast(adapter.getTypeName(), stringTag);
  final Broadcast<String> indexName = sc.broadcast(index.getName(), stringTag);

  // map to a pair containing the output key and the output value
  inputRDD.mapToPair(
      gridCoverage -> new Tuple2<>(
          new GeoWaveOutputKey(typeName.value(), indexName.value()),
          gridCoverage)).saveAsNewAPIHadoopDataset(job.getConfiguration());
}

Example 6

Source File: RDDUtils.java From geowave with Apache License 2.0

5 votes

public static Broadcast<? extends NumericIndexStrategy> broadcastIndexStrategy(
    final SparkContext sc,
    final NumericIndexStrategy indexStrategy) {
  final ClassTag<NumericIndexStrategy> indexClassTag =
      scala.reflect.ClassTag$.MODULE$.apply(indexStrategy.getClass());
  final Broadcast<NumericIndexStrategy> broadcastStrategy =
      sc.broadcast(indexStrategy, indexClassTag);
  return broadcastStrategy;
}