Java Code Examples for org.apache.avro.generic.GenericRecord#getSchema()

The following examples show how to use org.apache.avro.generic.GenericRecord#getSchema() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsonUtils.java    From localization_nifi with Apache License 2.0 8 votes vote down vote up
/**
 * Writes provided {@link GenericRecord} into the provided
 * {@link OutputStream} as JSON.
 */
public static void write(GenericRecord record, OutputStream out) {
    try {
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
        JsonEncoder encoder = EncoderFactory.get().jsonEncoder(record.getSchema(), out);
        writer.write(record, encoder);
        encoder.flush();
    } catch (Exception e) {
        throw new IllegalStateException("Failed to read GenericRecord", e);
    }
}
 
Example 2
Source File: BoltCommandHandlerHelper.java    From DBus with Apache License 2.0 6 votes vote down vote up
public static <T extends Object> PairWrapper<String, Object> convertDB2AvroRecord(GenericRecord record) {
    PairWrapper<String, Object> wrapper = new PairWrapper<>();
    Map<String, Object> map = convert2map(record);

    Schema schema = record.getSchema();
    List<Schema.Field> fields = schema.getFields();

    for (Schema.Field field : fields) {
        String key = field.name();
        Object value = record.get(key);
        addPairWrapperProperties(wrapper, key, value);
    }

    Long timeStamp = System.currentTimeMillis();
    addPairWrapperProperties(wrapper, "op_ts", DateUtil.convertLongToStr4Date(timeStamp));
    addPairWrapperProperties(wrapper, "pos", timeStamp);


    for (Map.Entry<String, Object> entry : map.entrySet()) {
        addPairWrapperValue(wrapper, entry.getKey(), entry.getValue());
    }

    return wrapper;
}
 
Example 3
Source File: Db2WrapperDefaultHandler.java    From DBus with Apache License 2.0 6 votes vote down vote up
private int addPayloadColumns(MetaWrapper meta, List<Object> payloads, GenericRecord data) throws Exception {
    int payloadSize = 0;
    for (MetaWrapper.MetaCell cell : meta.getColumns()) {
        if (cell.isSupportedOnDb2()) {
            Schema schema = data.getSchema();
            Schema.Field field = schema.getField(cell.getColumnName());
            Object value = null;
            try {
                value = data.get(field.name());
            } catch (Exception e) {
                logger.error("{}", e);
                logger.info("schema:{}", schema.toString());
                logger.info("data: {}", data.toString());
            }
            payloads.add(value);
            if (value != null) {
                payloadSize += value.toString().getBytes("utf-8").length;
            }
        }
    }

    return payloadSize;
}
 
Example 4
Source File: LiAvroSerializerBase.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public byte[] serialize(String topic, GenericRecord data)
    throws SerializationException {
  Schema schema = data.getSchema();
  MD5Digest schemaId = null;
  try {
    schemaId = schemaRegistry.register(topic, schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    // MAGIC_BYTE | schemaId-bytes | avro_payload
    out.write(LiAvroSerDeHelper.MAGIC_BYTE);
    out.write(schemaId.asBytes());
    BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null);
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
    writer.write(data, encoder);
    encoder.flush();
    byte[] bytes = out.toByteArray();
    out.close();
    return bytes;
  } catch (IOException | SchemaRegistryException e) {
    throw new SerializationException(e);
  }
}
 
Example 5
Source File: AvroUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during
 * conversion. If Schema is not provided, one is inferred from the AVRO schema.
 */
public static Row toBeamRowStrict(GenericRecord record, @Nullable Schema schema) {
  if (schema == null) {
    schema = toBeamSchema(record.getSchema());
  }

  Row.Builder builder = Row.withSchema(schema);
  org.apache.avro.Schema avroSchema = record.getSchema();

  for (Schema.Field field : schema.getFields()) {
    Object value = record.get(field.getName());
    org.apache.avro.Schema fieldAvroSchema = avroSchema.getField(field.getName()).schema();
    builder.addValue(convertAvroFieldStrict(value, fieldAvroSchema, field.getType()));
  }

  return builder.build();
}
 
Example 6
Source File: AvroStringFieldDecryptorConverterTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testArrayDecryption()
    throws DataConversionException, IOException, SchemaConversionException {
  AvroStringFieldDecryptorConverter converter = new AvroStringFieldDecryptorConverter();
  WorkUnitState wuState = new WorkUnitState();

  wuState.getJobState().setProp("converter.fieldsToDecrypt", "array1");
  wuState.getJobState().setProp("converter.decrypt.AvroStringFieldDecryptorConverter.algorithm", "insecure_shift");

  converter.init(wuState);
  GenericRecord inputRecord = generateRecordWithArrays();

  Schema inputSchema = inputRecord.getSchema();
  Schema outputSchema = converter.convertSchema(inputSchema, wuState);

  GenericData.Array<String> fieldValue = (GenericData.Array<String>) inputRecord.get("array1");

  Iterable<GenericRecord> recordIt = converter.convertRecord(outputSchema, inputRecord, wuState);
  GenericRecord decryptedRecord = recordIt.iterator().next();

  Assert.assertEquals(outputSchema, inputSchema);
  GenericData.Array<String> decryptedValue = (GenericData.Array<String>) decryptedRecord.get("array1");

  for (int i = 0; i < decryptedValue.size(); i++) {
    assertDecryptedValuesEqual(decryptedValue.get(i), fieldValue.get(i));
  }
}
 
Example 7
Source File: AvroUtils.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Writes provided {@link GenericRecord} into the provided
 * {@link OutputStream}.
 */
public static void write(GenericRecord record, OutputStream out) {
    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
    try {
        writer.write(record, encoder);
        encoder.flush();
    } catch (Exception e) {
        throw new IllegalStateException("Failed to write AVRO record", e);
    }
}
 
Example 8
Source File: AvroStringFieldDecryptorConverterTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testConversion()
    throws DataConversionException, IOException, SchemaConversionException {
  AvroStringFieldDecryptorConverter converter = new AvroStringFieldDecryptorConverter();
  WorkUnitState wuState = new WorkUnitState();

  wuState.getJobState().setProp("converter.fieldsToDecrypt", "field1");
  wuState.getJobState().setProp("converter.decrypt.AvroStringFieldDecryptorConverter.algorithm", "insecure_shift");

  converter.init(wuState);
  GenericRecord inputRecord = TestUtils.generateRandomAvroRecord();

  Schema inputSchema = inputRecord.getSchema();
  Schema outputSchema = converter.convertSchema(inputSchema, wuState);

  String fieldValue = (String) inputRecord.get("field1");

  Iterable<GenericRecord> recordIt = converter.convertRecord(outputSchema, inputRecord, wuState);
  GenericRecord decryptedRecord = recordIt.iterator().next();

  Assert.assertEquals(outputSchema, inputSchema);
  String decryptedValue = (String) decryptedRecord.get("field1");

  InsecureShiftCodec codec = new InsecureShiftCodec(Maps.<String, Object>newHashMap());
  InputStream in = codec.decodeInputStream(new ByteArrayInputStream(fieldValue.getBytes(StandardCharsets.UTF_8)));
  byte[] expectedDecryptedValue = new byte[in.available()];
  in.read(expectedDecryptedValue);

  Assert.assertEquals(new String(expectedDecryptedValue, StandardCharsets.UTF_8), decryptedValue);
}
 
Example 9
Source File: ClusterHdfsSource.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize) throws IOException {
  int previewCount = previewBuffer.size();
  Path filePath = fileStatus.getPath();
  SeekableInput input = new FsInput(filePath, hadoopConf);
  DatumReader<GenericRecord> reader = new GenericDatumReader<>();
  List<Map.Entry> batch = new ArrayList<>();
  try (FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader)) {
    int count = 0;
    while (fileReader.hasNext() && batch.size() < batchSize && previewCount < batchSize) {
      GenericRecord datum = fileReader.next();
      ByteArrayOutputStream out = new ByteArrayOutputStream();
      DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>
          (datum.getSchema()));
      try {
        dataFileWriter.create(datum.getSchema(), out);
        dataFileWriter.append(datum);
      } finally {
        dataFileWriter.close();
        out.close();
      }
      batch.add(new Pair(filePath.toUri().getPath() + "::" + count, out.toByteArray()));
      count++;
      previewCount++;
    }
  }
  return batch;
}
 
Example 10
Source File: AvroTypeUtil.java    From datacollector with Apache License 2.0 5 votes vote down vote up
public static byte[] getBinaryEncodedAvroRecord(GenericRecord datum) throws IOException {
  final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
  final DatumWriter<GenericRecord> outputDatumWriter = new GenericDatumWriter<>(datum.getSchema());
  final BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null);

  outputDatumWriter.write(datum, encoder);
  encoder.flush();
  baos.flush();
  baos.close();

  return baos.toByteArray();
}
 
Example 11
Source File: ToUpperCase.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public GenericRecord map(GenericRecord input) {
  Schema schema = input.getSchema();
  for (Schema.Field field : schema.getFields()) {
    Object value = input.get(field.name());
    if (value instanceof String || value instanceof Utf8) {
      // replace with upper case
      input.put(field.name(), value.toString().toUpperCase());
    }
  }
  return input;
}
 
Example 12
Source File: HoodieAvroUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Convert a given avro record to json and return the encoded bytes.
 *
 * @param record The GenericRecord to convert
 * @param pretty Whether to pretty-print the json output
 */
public static byte[] avroToJson(GenericRecord record, boolean pretty) throws IOException {
  DatumWriter<Object> writer = new GenericDatumWriter<>(record.getSchema());
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  JsonEncoder jsonEncoder = EncoderFactory.get().jsonEncoder(record.getSchema(), out, pretty);
  writer.write(record, jsonEncoder);
  jsonEncoder.flush();
  return out.toByteArray();
}
 
Example 13
Source File: AvroStringFieldEncryptorConverterTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testEncryptionOfArray()
    throws SchemaConversionException, DataConversionException, IOException {
  AvroStringFieldEncryptorConverter converter = new AvroStringFieldEncryptorConverter();
  WorkUnitState wuState = new WorkUnitState();

  wuState.getJobState().setProp("converter.fieldsToEncrypt", "favorite_quotes");
  wuState.getJobState().setProp("converter.encrypt.algorithm", "insecure_shift");

  converter.init(wuState);
  GenericRecord inputRecord =
      getRecordFromFile(getClass().getClassLoader().getResource("fieldPickInput_arrays.avro").getPath());
  GenericArray origValues = (GenericArray) inputRecord.get("favorite_quotes");
  for (int i = 0; i < origValues.size(); i++) {
    origValues.set(i, origValues.get(i).toString());
  }

  Schema inputSchema = inputRecord.getSchema();
  Schema outputSchema = converter.convertSchema(inputSchema, wuState);

  Iterable<GenericRecord> recordIt = converter.convertRecord(outputSchema, inputRecord, wuState);
  GenericRecord encryptedRecord = recordIt.iterator().next();

  Assert.assertEquals(outputSchema, inputSchema);

  GenericArray<String> encryptedVals = (GenericArray<String>) encryptedRecord.get("favorite_quotes");
  List<String> decryptedValues = Lists.newArrayList();
  for (String encryptedValue: encryptedVals) {
    InsecureShiftCodec codec = new InsecureShiftCodec(Maps.<String, Object>newHashMap());
    InputStream in =
        codec.decodeInputStream(new ByteArrayInputStream(encryptedValue.getBytes(StandardCharsets.UTF_8)));
    byte[] decryptedValue = new byte[in.available()];
    in.read(decryptedValue);
    decryptedValues.add(new String(decryptedValue, StandardCharsets.UTF_8));
  }

  Assert.assertEquals(decryptedValues, origValues);
}
 
Example 14
Source File: MergeAvroMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
private SqoopRecord toSqoopRecord(GenericRecord genericRecord) throws IOException {
  Schema avroSchema = genericRecord.getSchema();
  for (Schema.Field field : avroSchema.getFields()) {
    Pair<String, String> sqoopRecordField = sqoopRecordFields.get(field.name().toLowerCase());
    if (null == sqoopRecordField) {
      throw new IOException("Cannot find field '" + field.name() + "' in fields of user class"
          + sqoopRecordImpl.getClass().getName() + ". Fields are: "
          + Arrays.deepToString(sqoopRecordFields.values().toArray()));
    }
    Object avroObject = genericRecord.get(field.name());
    Object fieldVal = AvroUtil.fromAvro(avroObject, field.schema(), sqoopRecordField.value());
    sqoopRecordImpl.setField(sqoopRecordField.key(), fieldVal);
  }
  return sqoopRecordImpl;
}
 
Example 15
Source File: Kafka08DataWriterTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroSerialization()
    throws IOException, InterruptedException, SchemaRegistryException {
  String topic = "testAvroSerialization08";
  _kafkaTestHelper.provisionTopic(topic);
  Properties props = new Properties();
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_TOPIC, topic);
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX + "bootstrap.servers", "localhost:" + _kafkaTestHelper.getKafkaServerPort());
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX + "value.serializer",
      "org.apache.gobblin.kafka.serialize.LiAvroSerializer");

  // set up mock schema registry

  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX
      + KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS,
      ConfigDrivenMd5SchemaRegistry.class.getCanonicalName());

  Kafka08DataWriter kafka08DataWriter = new Kafka08DataWriter<>(props);
  WriteCallback callback = mock(WriteCallback.class);

  GenericRecord record = TestUtils.generateRandomAvroRecord();
  try {
    kafka08DataWriter.write(record, callback);
  }
  finally
  {
    kafka08DataWriter.close();
  }

  verify(callback, times(1)).onSuccess(isA(WriteResponse.class));
  verify(callback, never()).onFailure(isA(Exception.class));

  byte[] message = _kafkaTestHelper.getIteratorForTopic(topic).next().message();
  ConfigDrivenMd5SchemaRegistry schemaReg = new ConfigDrivenMd5SchemaRegistry(topic, record.getSchema());
  LiAvroDeserializer deser = new LiAvroDeserializer(schemaReg);
  GenericRecord receivedRecord = deser.deserialize(topic, message);
  Assert.assertEquals(record.toString(), receivedRecord.toString());
}
 
Example 16
Source File: BoltCommandHandlerHelper.java    From DBus with Apache License 2.0 5 votes vote down vote up
public static <T extends Object> PairWrapper<String, Object> convertAvroRecord(GenericRecord record, Set<T> noorderKeys) {
    Schema schema = record.getSchema();
    List<Schema.Field> fields = schema.getFields();
    PairWrapper<String, Object> wrapper = new PairWrapper<>();

    for (Schema.Field field : fields) {
        String key = field.name();
        Object value = record.get(key);
        // 分离存储是否关心顺序的key-value
        if (noorderKeys.contains(field.name())) {
            //wrapper.addProperties(key, value);
            addPairWrapperProperties(wrapper, key, value);
        }
    }

    GenericRecord before = getFromRecord(MessageBodyKey.BEFORE, record);
    GenericRecord after = getFromRecord(MessageBodyKey.AFTER, record);

    Map<String, Object> beforeMap = convert2map(before);
    Map<String, Object> afterMap = convert2map(after);

    // 覆盖before
    mergeMap(beforeMap, afterMap);

    for (Map.Entry<String, Object> entry : beforeMap.entrySet()) {
        if (!entry.getKey().endsWith(MessageBodyKey.IS_MISSING_SUFFIX)) {
            if ((Boolean) beforeMap.get(entry.getKey() + MessageBodyKey.IS_MISSING_SUFFIX)) {
                wrapper.addMissingField(entry.getKey());
            }
            //wrapper.addPair(new Pair<>(entry.getKey(), CharSequence.class.isInstance(entry.getValue()) ? entry.getValue().toString() : entry.getValue()));
            addPairWrapperValue(wrapper, entry.getKey(), entry.getValue());
        }
    }

    return wrapper;
}
 
Example 17
Source File: Kafka09DataWriterTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testKeyedAvroSerialization()
    throws IOException, InterruptedException, SchemaRegistryException {
  String topic = "testAvroSerialization09";
  _kafkaTestHelper.provisionTopic(topic);
  Properties props = new Properties();
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_TOPIC, topic);
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX + "bootstrap.servers",
      "127.0.0.1:" + _kafkaTestHelper.getKafkaServerPort());
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX + "value.serializer",
      LiAvroSerializer.class.getName());
  props.setProperty(KafkaWriterConfigurationKeys.WRITER_KAFKA_KEYED_CONFIG, "true");
  String keyField = "field1";
  props.setProperty(KafkaWriterConfigurationKeys.WRITER_KAFKA_KEYFIELD_CONFIG, keyField);


  // set up mock schema registry

  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX
          + KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS,
      ConfigDrivenMd5SchemaRegistry.class.getCanonicalName());

  Kafka09DataWriter<String, GenericRecord> kafka09DataWriter = new Kafka09DataWriter<>(props);
  WriteCallback callback = mock(WriteCallback.class);

  GenericRecord record = TestUtils.generateRandomAvroRecord();
  try {
    kafka09DataWriter.write(record, callback);
  } finally {
    kafka09DataWriter.close();
  }

  verify(callback, times(1)).onSuccess(isA(WriteResponse.class));
  verify(callback, never()).onFailure(isA(Exception.class));
  MessageAndMetadata<byte[], byte[]> value = _kafkaTestHelper.getIteratorForTopic(topic).next();
  byte[] key = value.key();
  byte[] message = value.message();
  ConfigDrivenMd5SchemaRegistry schemaReg = new ConfigDrivenMd5SchemaRegistry(topic, record.getSchema());
  LiAvroDeserializer deser = new LiAvroDeserializer(schemaReg);
  GenericRecord receivedRecord = deser.deserialize(topic, message);
  Assert.assertEquals(record.toString(), receivedRecord.toString());
  Assert.assertEquals(new String(key), record.get(keyField));
}
 
Example 18
Source File: Kafka09DataWriterTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroSerialization()
    throws IOException, InterruptedException, SchemaRegistryException {
  String topic = "testAvroSerialization08";
  _kafkaTestHelper.provisionTopic(topic);
  Properties props = new Properties();
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_TOPIC, topic);
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX + "bootstrap.servers",
      "127.0.0.1:" + _kafkaTestHelper.getKafkaServerPort());
  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX + "value.serializer",
      LiAvroSerializer.class.getName());

  // set up mock schema registry

  props.setProperty(KafkaWriterConfigurationKeys.KAFKA_PRODUCER_CONFIG_PREFIX
      + KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS,
      ConfigDrivenMd5SchemaRegistry.class.getCanonicalName());

  Kafka09DataWriter<String, GenericRecord> kafka09DataWriter = new Kafka09DataWriter<>(props);
  WriteCallback callback = mock(WriteCallback.class);

  GenericRecord record = TestUtils.generateRandomAvroRecord();
  try {
    kafka09DataWriter.write(record, callback);
  } finally {
    kafka09DataWriter.close();
  }

  log.info("Kafka events written");

  verify(callback, times(1)).onSuccess(isA(WriteResponse.class));
  verify(callback, never()).onFailure(isA(Exception.class));

  byte[] message = _kafkaTestHelper.getIteratorForTopic(topic).next().message();

  log.info("Kafka events read, start to check result... ");
  ConfigDrivenMd5SchemaRegistry schemaReg = new ConfigDrivenMd5SchemaRegistry(topic, record.getSchema());
  LiAvroDeserializer deser = new LiAvroDeserializer(schemaReg);
  GenericRecord receivedRecord = deser.deserialize(topic, message);
  Assert.assertEquals(record.toString(), receivedRecord.toString());
}
 
Example 19
Source File: EnvelopePayloadConverterTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testConverter()
    throws IOException, DataConversionException, SchemaRegistryException {
  Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/envelope.avsc"));
  GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(inputSchema);

  File tmp = File.createTempFile(getClass().getSimpleName(), null);
  FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/envelope.avro"), tmp);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(tmp, datumReader);
  GenericRecord inputRecord = dataFileReader.next();

  Schema latestPayloadSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/record.avsc"));
  when(mockRegistry.getLatestSchemaByTopic(any())).thenReturn(latestPayloadSchema);
  when(mockRegistry.getSchemaByKey(any())).thenReturn(inputSchema.getField("nestedRecord").schema());

  WorkUnitState workUnitState = new WorkUnitState();
  workUnitState.setProp(BaseEnvelopeSchemaConverter.PAYLOAD_SCHEMA_TOPIC, "test");
  workUnitState.setProp(BaseEnvelopeSchemaConverter.PAYLOAD_SCHEMA_ID_FIELD, "metadata.payloadSchemaId");
  workUnitState
      .setProp(BaseEnvelopeSchemaConverter.KAFKA_REGISTRY_FACTORY, MockKafkaAvroSchemaRegistryFactory.class.getName());

  EnvelopePayloadConverter converter = new EnvelopePayloadConverter();
  converter.init(workUnitState);

  Schema outputSchema = converter.convertSchema(inputSchema, workUnitState);
  List<GenericRecord> outputRecords = new ArrayList<>();
  Iterables.addAll(outputRecords, converter.convertRecord(outputSchema, inputRecord, workUnitState));
  Assert.assertTrue(outputRecords.size() == 1);

  GenericRecord outputRecord = outputRecords.get(0);
  GenericRecord payload = (GenericRecord) outputRecord.get("payload");
  // While making the test envelope avro record, its nestedRecord was intentionally set to the deserialized payload
  GenericRecord expectedPayload = (GenericRecord) outputRecord.get("nestedRecord");

  Schema payloadSchema = payload.getSchema();
  Schema expectedPayloadSchema = expectedPayload.getSchema();
  // The expected payload schema has the same number of fields as payload schema but in different order
  Assert.assertTrue(expectedPayloadSchema.getName().equals(payloadSchema.getName()));
  Assert.assertTrue(expectedPayloadSchema.getNamespace().equals(payloadSchema.getNamespace()));
  Assert.assertTrue(expectedPayloadSchema.getFields().size() == payloadSchema.getFields().size());

  for (Schema.Field field : payload.getSchema().getFields()) {
    Assert.assertTrue(expectedPayload.get(field.name()).equals(payload.get(field.name())));
  }
}
 
Example 20
Source File: EnvelopePayloadExtractingConverterTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testConverter()
    throws Exception {
  Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/envelope.avsc"));
  GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(inputSchema);

  File tmp = File.createTempFile(getClass().getSimpleName(), null);
  FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/envelope.avro"), tmp);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(tmp, datumReader);
  GenericRecord inputRecord = dataFileReader.next();

  Schema latestPayloadSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/record.avsc"));
  when(mockRegistry.getLatestSchemaByTopic(any())).thenReturn(latestPayloadSchema);
  when(mockRegistry.getSchemaByKey(any())).thenReturn(inputSchema.getField("nestedRecord").schema());

  WorkUnitState workUnitState = new WorkUnitState();
  workUnitState.setProp(BaseEnvelopeSchemaConverter.PAYLOAD_SCHEMA_TOPIC, "test");
  workUnitState.setProp(BaseEnvelopeSchemaConverter.PAYLOAD_SCHEMA_ID_FIELD, "metadata.payloadSchemaId");
  workUnitState.setProp(BaseEnvelopeSchemaConverter.KAFKA_REGISTRY_FACTORY,
      EnvelopePayloadExtractingConverterTest.MockKafkaAvroSchemaRegistryFactory.class.getName());

  EnvelopePayloadExtractingConverter converter = new EnvelopePayloadExtractingConverter();
  converter.init(workUnitState);

  Schema outputSchema = converter.convertSchema(inputSchema, workUnitState);
  Assert.assertTrue(outputSchema.equals(latestPayloadSchema));

  List<GenericRecord> outputRecords = new ArrayList<>();
  Iterables.addAll(outputRecords, converter.convertRecord(outputSchema, inputRecord, workUnitState));
  Assert.assertTrue(outputRecords.size() == 1);

  GenericRecord payload = outputRecords.get(0);
  // While making the test envelope avro input record, its nestedRecord was intentionally set to the deserialized payload
  GenericRecord expectedPayload = (GenericRecord) inputRecord.get("nestedRecord");

  Schema payloadSchema = payload.getSchema();
  Schema expectedPayloadSchema = expectedPayload.getSchema();
  // The expected payload schema has the same number of fields as payload schema but in different order
  Assert.assertTrue(expectedPayloadSchema.getName().equals(payloadSchema.getName()));
  Assert.assertTrue(expectedPayloadSchema.getNamespace().equals(payloadSchema.getNamespace()));
  Assert.assertTrue(expectedPayloadSchema.getFields().size() == payloadSchema.getFields().size());

  for (Schema.Field field : payload.getSchema().getFields()) {
    Assert.assertTrue(expectedPayload.get(field.name()).equals(payload.get(field.name())));
  }
}