org.apache.spark.serializer.KryoSerializer Java Exaples

Source File: TestDataFileSerialization.java From iceberg with Apache License 2.0

6 votes

@Test
public void testDataFileKryoSerialization() throws Exception {
  File data = temp.newFile();
  Assert.assertTrue(data.delete());
  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();

  try (Output out = new Output(new FileOutputStream(data))) {
    kryo.writeClassAndObject(out, DATA_FILE);
    kryo.writeClassAndObject(out, DATA_FILE.copy());
  }

  try (Input in = new Input(new FileInputStream(data))) {
    for (int i = 0; i < 2; i += 1) {
      Object obj = kryo.readClassAndObject(in);
      Assert.assertTrue("Should be a DataFile", obj instanceof DataFile);
      checkDataFile(DATA_FILE, (DataFile) obj);
    }
  }
}

Source File: TestDataFileSerialization.java From iceberg with Apache License 2.0

6 votes

@Test
public void testParquetWriterSplitOffsets() throws IOException {
  Iterable<InternalRow> records = RandomData.generateSpark(DATE_SCHEMA, 1, 33L);
  File parquetFile = new File(
      temp.getRoot(),
      FileFormat.PARQUET.addExtension(UUID.randomUUID().toString()));
  FileAppender<InternalRow> writer =
      Parquet.write(Files.localOutput(parquetFile))
          .schema(DATE_SCHEMA)
          .createWriterFunc(msgType -> SparkParquetWriters.buildWriter(SparkSchemaUtil.convert(DATE_SCHEMA), msgType))
          .build();
  try {
    writer.addAll(records);
  } finally {
    writer.close();
  }

  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();
  File dataFile = temp.newFile();
  try (Output out = new Output(new FileOutputStream(dataFile))) {
    kryo.writeClassAndObject(out, writer.splitOffsets());
  }
  try (Input in = new Input(new FileInputStream(dataFile))) {
    kryo.readClassAndObject(in);
  }
}

Source File: TestManifestFileSerialization.java From iceberg with Apache License 2.0

6 votes

@Test
public void testManifestFileKryoSerialization() throws IOException {
  File data = temp.newFile();
  Assert.assertTrue(data.delete());

  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();

  ManifestFile manifest = writeManifest(FILE_A);

  try (Output out = new Output(new FileOutputStream(data))) {
    kryo.writeClassAndObject(out, manifest);
    kryo.writeClassAndObject(out, manifest.copy());
    kryo.writeClassAndObject(out, GenericManifestFile.copyOf(manifest).build());
  }

  try (Input in = new Input(new FileInputStream(data))) {
    for (int i = 0; i < 3; i += 1) {
      Object obj = kryo.readClassAndObject(in);
      Assert.assertTrue("Should be a ManifestFile", obj instanceof ManifestFile);
      checkManifestFile(manifest, (ManifestFile) obj);
    }
  }
}

Source File: LensAPI.java From cognition with Apache License 2.0

6 votes

/**
 * Helper method for creating the spark context from the given cognition configuration
 * @return a new configured spark context
 */
public SparkContext createSparkContext() {
  SparkConf conf = new SparkConf();

  Configuration config = cognition.getProperties();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName(config.getString("app.name"));
  conf.setMaster(config.getString("master"));

  Iterator<String> iterator = config.getKeys("spark");
  while (iterator.hasNext()) {
    String key = iterator.next();
    conf.set(key, config.getString(key));
  }

  SparkContext sc = new SparkContext(conf);
  for (String jar : config.getStringArray("jars")) {
    sc.addJar(jar);
  }

  return sc;
}

Source File: UnshadedKryoShimService.java From tinkerpop with Apache License 2.0

6 votes

private LinkedBlockingQueue<Kryo> initialize(final Configuration configuration) {
    // DCL is safe in this case due to volatility
    if (!INITIALIZED) {
        synchronized (UnshadedKryoShimService.class) {
            if (!INITIALIZED) {
                // so we don't get a WARN that a new configuration is being created within an active context
                final SparkConf sparkConf = null == Spark.getContext() ? new SparkConf() : Spark.getContext().getConf().clone();
                configuration.getKeys().forEachRemaining(key -> sparkConf.set(key, configuration.getProperty(key).toString()));
                final KryoSerializer serializer = new KryoSerializer(sparkConf);
                // Setup a pool backed by our spark.serializer instance
                // Reuse Gryo poolsize for Kryo poolsize (no need to copy this to SparkConf)
                KRYOS.clear();
                final int poolSize = configuration.getInt(GryoPool.CONFIG_IO_GRYO_POOL_SIZE, GryoPool.CONFIG_IO_GRYO_POOL_SIZE_DEFAULT);
                for (int i = 0; i < poolSize; i++) {
                    KRYOS.add(serializer.newKryo());
                }
                INITIALIZED = true;
            }
        }
    }

    return KRYOS;
}

Source File: SparkFrontendUtils.java From incubator-nemo with Apache License 2.0

5 votes

/**
 * Derive Spark serializer from a spark context.
 *
 * @param sparkContext spark context to derive the serializer from.
 * @return the serializer.
 */
public static Serializer deriveSerializerFrom(final org.apache.spark.SparkContext sparkContext) {
  if (sparkContext.conf().get("spark.serializer", "")
    .equals("org.apache.spark.serializer.KryoSerializer")) {
    return new KryoSerializer(sparkContext.conf());
  } else {
    return new JavaSerializer(sparkContext.conf());
  }
}

Source File: SparkFrontendUtils.java From nemo with Apache License 2.0

5 votes

/**
 * Derive Spark serializer from a spark context.
 * @param sparkContext spark context to derive the serializer from.
 * @return the serializer.
 */
public static Serializer deriveSerializerFrom(final SparkContext sparkContext) {
  if (sparkContext.conf().get("spark.serializer", "")
      .equals("org.apache.spark.serializer.KryoSerializer")) {
    return new KryoSerializer(sparkContext.conf());
  } else {
    return new JavaSerializer(sparkContext.conf());
  }
}

Source File: SparkBatchProcessingTest.java From OSTMap with Apache License 2.0

5 votes

private static StreamingContext createSparkStreamingContext(){
    SparkConf conf = new SparkConf()
            .setAppName("Spark Batch Processing Test")
            .set("spark.serializer", KryoSerializer.class.getCanonicalName())
            .set("spark.eventLog.enabled", "true");
    return new StreamingContext(conf, Durations.seconds(15));
}

Source File: LensTest.java From cognition with Apache License 2.0

5 votes

@Test
public void test() throws AccumuloSecurityException, IOException, AccumuloException, TableExistsException, TableNotFoundException {

/*Connector conn = instance.getConnector("root", new PasswordToken());
Scanner scan = conn.createScanner("moreover", Authorizations.EMPTY);
for(Map.Entry<Key, Value> entry : scan){
	System.out.println(entry);
}*/

  SparkConf conf = new SparkConf();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName("test");
  conf.setMaster("local[2]");

  SparkContext sc = new SparkContext(conf);

  CognitionConfiguration pip = new CognitionConfiguration(new AccumuloConfiguration(instance, user, password, true));
  LensAPI lens = new LensAPI(sc, pip);
  Criteria criteria = new Criteria();
  criteria.addKeyword("test");
  criteria.setDates(Instant.parse("2015-10-20T09:19:12Z"), Instant.parse("2015-10-20T09:19:13Z"));
  SchemaAdapter s = new SchemaAdapter();
  s.loadJson("moreover-schema.json");
  criteria.setSchema(s);
  criteria.setAccumuloTable("moreover");
  String json = lens.query(criteria);
  assertEquals("[moreover json]", json);
}

Source File: SparkHadoopGraphProvider.java From tinkerpop with Apache License 2.0

4 votes

@Override
public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName, final LoadGraphWith.GraphData loadGraphWith) {
    this.graphSONInput = RANDOM.nextBoolean();
    if (this.getClass().equals(SparkHadoopGraphProvider.class) && !SparkHadoopGraphProvider.class.getCanonicalName().equals(System.getProperty(PREVIOUS_SPARK_PROVIDER, null))) {
        Spark.close();
        HadoopPools.close();
        KryoShimServiceLoader.close();
        System.setProperty(PREVIOUS_SPARK_PROVIDER, SparkHadoopGraphProvider.class.getCanonicalName());
    }

    final Map<String,Object> config = new HashMap<String, Object>() {{
        put(Graph.GRAPH, HadoopGraph.class.getName());
        put(Constants.GREMLIN_HADOOP_GRAPH_READER, graphSONInput ? GraphSONInputFormat.class.getCanonicalName() : GryoInputFormat.class.getCanonicalName());
        put(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName());
        put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, getWorkingDirectory());
        put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);

        put(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);  // this makes the test suite go really fast
    }};

    // toy graph inputRDD does not have corresponding outputRDD so where jobs chain, it fails (failing makes sense)
    if (null != loadGraphWith &&
            !test.equals(ProgramTest.Traversals.class) &&
            !test.equals(PageRankTest.Traversals.class) &&
            !test.equals(ConnectedComponentTest.Traversals.class) &&
            !test.equals(ShortestPathTest.Traversals.class) &&
            !test.equals(PeerPressureTest.Traversals.class) &&
            !test.equals(FileSystemStorageCheck.class) &&
            !testMethodName.equals("shouldSupportJobChaining") &&  // GraphComputerTest.shouldSupportJobChaining
            RANDOM.nextBoolean()) {
        config.put(Constants.GREMLIN_HADOOP_GRAPH_READER, ToyGraphInputRDD.class.getCanonicalName());
    }

    // tests persisted RDDs
    if (test.equals(SparkContextStorageCheck.class)) {
        config.put(Constants.GREMLIN_HADOOP_GRAPH_READER, ToyGraphInputRDD.class.getCanonicalName());
        config.put(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    }

    config.put(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER, SparkGraphComputer.class.getCanonicalName());
    config.put(SparkLauncher.SPARK_MASTER, "local[" + AVAILABLE_PROCESSORS + "]");
    config.put(Constants.SPARK_SERIALIZER, KryoSerializer.class.getCanonicalName());
    config.put(Constants.SPARK_KRYO_REGISTRATOR, GryoRegistrator.class.getCanonicalName());
    config.put(Constants.SPARK_KRYO_REGISTRATION_REQUIRED, true);
    return config;
}

Source File: SpliceKryoSerializerInstance.java From spliceengine with GNU Affero General Public License v3.0

4 votes

public SpliceKryoSerializerInstance(KryoSerializer ks) {
    super(ks,false);
}

Source File: SparkTestUtils.java From gatk with BSD 3-Clause "New" or "Revised" License

3 votes

/**
 * Takes an input object and returns the value of the object after it has been serialized and then deserialized in Kryo.
 * Requires the class of the input object as a parameter because it's not generally possible to get the class of a
 * generified method parameter with reflection.
 *
 * @param input instance of inputClazz.  Never {@code null}
 * @param inputClazz class to cast input
 * @param conf Spark configuration to test
 * @param <T> class to attempt.  Same or subclass of inputClazz
 * @return serialized and deserialized instance of input.  Throws exception if serialization round trip fails.
 */
public static <T> T roundTripInKryo(final T input, final Class<?> inputClazz, final SparkConf conf) {
    Utils.nonNull(input);
    final KryoSerializer kryoSerializer = new KryoSerializer(conf);
    final SerializerInstance sparkSerializer = kryoSerializer.newInstance();
    final ClassTag<T> tag = ClassTag$.MODULE$.apply(inputClazz);
    return sparkSerializer.deserialize(sparkSerializer.serialize(input, tag), tag);
}

org.apache.spark.serializer.KryoSerializer Java Examples