org.apache.avro.SchemaNormalization Java Examples

The following examples show how to use org.apache.avro.SchemaNormalization. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroEventDeserializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
private void initialize() throws IOException, NoSuchAlgorithmException {
  SeekableResettableInputBridge in = new SeekableResettableInputBridge(ris);
  long pos = in.tell();
  in.seek(0L);
  fileReader = new DataFileReader<GenericRecord>(in,
      new GenericDatumReader<GenericRecord>());
  fileReader.sync(pos);

  schema = fileReader.getSchema();
  datumWriter = new GenericDatumWriter(schema);
  out = new ByteArrayOutputStream();
  encoder = EncoderFactory.get().binaryEncoder(out, encoder);

  schemaHash = SchemaNormalization.parsingFingerprint("CRC-64-AVRO", schema);
  schemaHashString = Hex.encodeHexString(schemaHash);
}
 
Example #2
Source File: TestAvroEventDeserializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchemaHash() throws IOException, NoSuchAlgorithmException {
  File tempFile = newTestFile(true);

  String target = tempFile.getAbsolutePath();
  logger.info("Target: {}", target);
  TransientPositionTracker tracker = new TransientPositionTracker(target);

  Context context = new Context();
  context.put(AvroEventDeserializer.CONFIG_SCHEMA_TYPE_KEY,
      AvroEventDeserializer.AvroSchemaType.HASH.toString());

  ResettableInputStream in =
      new ResettableFileInputStream(tempFile, tracker);
  EventDeserializer des =
      new AvroEventDeserializer.Builder().build(context, in);

  Event event = des.readEvent();
  String eventSchemaHash =
      event.getHeaders().get(AvroEventDeserializer.AVRO_SCHEMA_HEADER_HASH);
  String expectedSchemaHash = Hex.encodeHexString(
      SchemaNormalization.parsingFingerprint("CRC-64-AVRO", schema));

  Assert.assertEquals(expectedSchemaHash, eventSchemaHash);
}
 
Example #3
Source File: GoogleCloudPubSubFlusherTest.java    From divolte-collector with Apache License 2.0 6 votes vote down vote up
@Test
public void testMessagesHaveSchemaFingerprint() {
    processSingleMessage();
    // Reminder: fingerprint is the SHA-256 hash of the normalized schema,
    //           base-64 encoded using the URL-safe encoding,
    //           with trailing padding stripped.
    final String expectedFingerPrint =
        BaseEncoding.base64Url()
                    .encode(Hashing.sha256()
                                   .hashString(SchemaNormalization.toParsingForm(MINIMAL_SCHEMA),
                                               StandardCharsets.UTF_8)
                                   .asBytes())
                    .replace("=", "");
    final PubsubMessage deliveredMessage = getFirstPublishedMessage();
    assertEquals(expectedFingerPrint, deliveredMessage.getAttributesOrThrow("schemaFingerprint"));
}
 
Example #4
Source File: IcebergEncoder.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static byte[] getWriteHeader(org.apache.avro.Schema schema) {
  try {
    byte[] fp = SchemaNormalization.parsingFingerprint("CRC-64-AVRO", schema);
    return Bytes.concat(V1_HEADER, fp);
  } catch (NoSuchAlgorithmException e) {
    throw new AvroRuntimeException(e);
  }
}
 
Example #5
Source File: IcebergEncoder.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static byte[] getWriteHeader(org.apache.avro.Schema schema) {
  try {
    byte[] fp = SchemaNormalization.parsingFingerprint("CRC-64-AVRO", schema);
    return Bytes.concat(V1_HEADER, fp);
  } catch (NoSuchAlgorithmException e) {
    throw new AvroRuntimeException(e);
  }
}
 
Example #6
Source File: SchemaIdGenerator.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
private static long fingerprint(final List<Schema.Field> fields) {
    return SchemaNormalization
            .parsingFingerprint64(Schema
                    .createRecord(fields
                            .stream()
                            .map(it -> new Schema.Field(it.name(), it.schema(), it.doc(), it.defaultVal(),
                                    it.order()))
                            .collect(toList())));
}
 
Example #7
Source File: FastDeserializerGeneratorBase.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static int getSchemaId(Schema schema) {
    Integer schemaId = SCHEMA_IDS_CACHE.get(schema);
    if (schemaId == null) {
        String schemaString = SchemaNormalization.toParsingForm(schema);
        schemaId = HASH_FUNCTION.hashString(schemaString, Charsets.UTF_8).asInt();
        SCHEMA_IDS_CACHE.put(schema, schemaId);
    }

    return schemaId;
}
 
Example #8
Source File: FastSerializerGeneratorBase.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static int getSchemaId(Schema schema) {
    Integer schemaId = SCHEMA_IDS_CACHE.get(schema);
    if (schemaId == null) {
        String schemaString = SchemaNormalization.toParsingForm(schema);
        schemaId = HASH_FUNCTION.hashString(schemaString, Charsets.UTF_8).asInt();
        SCHEMA_IDS_CACHE.put(schema, schemaId);
    }

    return schemaId;
}
 
Example #9
Source File: GoogleCloudPubSubFlusher.java    From divolte-collector with Apache License 2.0 5 votes vote down vote up
private static String schemaFingerprint(final DivolteSchema schema) {
    final Schema avroSchema = schema.avroSchema;
    final byte[] fingerprint;
    // SHA-256 is on the list of mandatory JCE algorithms, so this shouldn't be an issue.
    try {
        fingerprint = SchemaNormalization.parsingFingerprint("SHA-256", avroSchema);
    } catch (final NoSuchAlgorithmException e) {
        throw new RuntimeException("Cannot calculate schema fingerprint; missing SHA-256 digest algorithm", e);
    }
    return FINGERPRINT_ENCODER.encodeToString(fingerprint);
}
 
Example #10
Source File: FixedAvroSerializer.java    From jstorm with Apache License 2.0 5 votes vote down vote up
public FixedAvroSerializer() throws IOException, NoSuchAlgorithmException {
    InputStream in = this.getClass().getClassLoader().getResourceAsStream("FixedAvroSerializer.config");
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));

    String line;
    while((line = reader.readLine()) != null) {
        Schema schema = new Schema.Parser().parse(line);
        byte [] fp = SchemaNormalization.parsingFingerprint(FP_ALGO, schema);
        String fingerPrint = new String(Base64.decodeBase64(fp));

        fingerprint2schemaMap.put(fingerPrint, schema);
        schema2fingerprintMap.put(schema, fingerPrint);
    }
}
 
Example #11
Source File: IcebergDecoder.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private void addSchema(org.apache.avro.Schema writeSchema) {
  long fp = SchemaNormalization.parsingFingerprint64(writeSchema);
  decoders.put(fp, new RawDecoder<>(readSchema, writeSchema));
}
 
Example #12
Source File: Avro18Adapter.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
public String toParsingForm(Schema s) {
  return SchemaNormalization.toParsingForm(s);
}
 
Example #13
Source File: Avro19Adapter.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
public String toParsingForm(Schema s) {
  return SchemaNormalization.toParsingForm(s);
}
 
Example #14
Source File: Avro17Adapter.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
public String toParsingForm(Schema s) {
  return SchemaNormalization.toParsingForm(s);
}
 
Example #15
Source File: IcebergDecoder.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private void addSchema(org.apache.avro.Schema writeSchema) {
  long fp = SchemaNormalization.parsingFingerprint64(writeSchema);
  decoders.put(fp, new RawDecoder<>(readSchema, writeSchema));
}