org.apache.avro.generic.GenericData Java Examples

The following examples show how to use org.apache.avro.generic.GenericData. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JiraInsertWriterTestIT.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Checks {@link JiraInsertWriter#write()} throws {@link IOException} which error message contains:
 * "Reason: record is invalid"
 * 
 * @throws IOException
 */
@Test
public void testWriteErrorMessage() throws IOException {
    IndexedRecord badIssueTypeRecord = new GenericData.Record(INSERT_SCHEMA);
    String insertIssue1 = "{\"fields\":{\"project\":{\"key\":\"TP\"},\"summary\":\"Integration test issue 1\",\"issuetype\":{\"id\":\"12345\"}}}";
    badIssueTypeRecord.put(0, insertIssue1);
    
    thrown.expect(IOException.class);
    thrown.expectMessage("Reason: record is invalid");
    thrown.expectMessage("Record: " + insertIssue1);
    thrown.expectMessage("Error: ");
    thrown.expectMessage("{\"errorMessages\":[],\"errors\":{\"issuetype\":\"valid issue type is required\"}}");

    JiraWriter insertIssueWriter = JiraTestsHelper.createWriter(HOST_PORT, USER, PASS, Resource.ISSUE, Action.INSERT);

    insertIssueWriter.open("ins");
    try {
        insertIssueWriter.write(badIssueTypeRecord);
    } finally {
        insertIssueWriter.close();
    }
}
 
Example #2
Source File: GenericDataTSVTest.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
@Test
public void testToString2() throws Exception {

    Schema s2 = p.parse(sc2);
    GenericData.Record r2 = new GenericData.Record(s2);
    r2.put("x",0.345621f);
    r2.put("y",Double.NaN);
    r2.put("x2", Float.POSITIVE_INFINITY);
    r2.put("y2", 1.0);
    r2.put("x3", Float.NaN);
    r2.put("y3", Double.POSITIVE_INFINITY);
    r2.put("x4", Float.NaN);
    r2.put("y4", Double.NEGATIVE_INFINITY);

    String tsv = gd.toString(r2);
    Assert.assertNotNull(tsv);
    Assert.assertEquals("0.345621\tNaN\tInfinity\t1.0\tNaN\tInfinity\tNaN\t-Infinity",tsv);
}
 
Example #3
Source File: TestAvroDecoder.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testArrayOfMapsWithNulls()
{
    Schema schema = SchemaBuilder.array()
            .items()
            .nullable().map()
            .values()
            .nullable().floatType();
    List<Map<String, Float>> data = Arrays.asList(
            buildMapFromKeysAndValues(ImmutableList.of("key1", "key2", "key3"), ImmutableList.of(1.3F, 2.3F, -.5F)),
            null,
            buildMapFromKeysAndValues(ImmutableList.of("key10", "key20", "key30"), ImmutableList.of(11.3F, 12.3F, -1.5F)),
            buildMapFromKeysAndValues(ImmutableList.of("key100", "key200", "key300"), Arrays.asList(111.3F, null, -11.5F)));

    DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(REAL_MAP_TYPE), "array_field", null, null, false, false, false);
    GenericArray<Map<String, Float>> list = new GenericData.Array<>(schema, data);
    Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list);
    checkArrayValues(getBlock(decodedRow, row), row.getType(), data);
}
 
Example #4
Source File: AvroSerializationSchema.java    From flink with Apache License 2.0 6 votes vote down vote up
protected void checkAvroInitialized() {
	if (datumWriter != null) {
		return;
	}
	ClassLoader cl = Thread.currentThread().getContextClassLoader();
		if (SpecificRecord.class.isAssignableFrom(recordClazz)) {
		Schema schema = SpecificData.get().getSchema(recordClazz);
		this.datumWriter = new SpecificDatumWriter<>(schema);
		this.schema = schema;
	} else {
		this.schema = new Schema.Parser().parse(this.schemaString);
		GenericData genericData = new GenericData(cl);

		this.datumWriter = new GenericDatumWriter<>(schema, genericData);
	}
	this.arrayOutputStream = new ByteArrayOutputStream();
	this.encoder = EncoderFactory.get().directBinaryEncoder(arrayOutputStream, null);
}
 
Example #5
Source File: AvroDataStreamParser.java    From datacollector with Apache License 2.0 6 votes vote down vote up
public AvroDataStreamParser(
    ProtoConfigurableEntity.Context context,
    Schema schema,
    String streamName,
    InputStream inputStream,
    long recordCount,
    int maxObjectLength,
    boolean skipAvroUnionIndexes
) throws IOException {
  this.context = context;
  avroSchema = schema;
  this.streamName = streamName;
  this.recordCount = recordCount;
  datumReader = new GenericDatumReader<>(avroSchema, avroSchema, GenericData.get()); //Reader schema argument is optional
  overrunInputStream = new OverrunInputStream(inputStream, maxObjectLength, true);
  dataFileStream = new DataFileStream<>(overrunInputStream, datumReader);
  seekToOffset();
  this.skipAvroUnionIndexes = skipAvroUnionIndexes;
}
 
Example #6
Source File: JDBCRowWriter.java    From components with Apache License 2.0 6 votes vote down vote up
private void handleSuccess(IndexedRecord input) {
    successCount++;

    if (outSchema == null || outSchema.getFields().size() == 0) {
        return;
    }

    IndexedRecord output = new GenericData.Record(outSchema);
    for (Schema.Field outField : output.getSchema().getFields()) {
        Object outValue = null;

        if (propagateQueryResultSet && outField.name().equals(setting.getUseColumn())) {
            output.put(outField.pos(), resultSet);
        } else {
            Schema.Field inField = input.getSchema().getField(outField.name());
            if (inField != null) {
                outValue = input.get(inField.pos());
            }
            output.put(outField.pos(), outValue);
        }
    }

    successfulWrites.add(output);
}
 
Example #7
Source File: AzureStorageQueueOutputWriterTestIT.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteSimpleMessage() throws Throwable {
    queue.clear();
    //
    TAzureStorageQueueOutputProperties properties = new TAzureStorageQueueOutputProperties("tests");
    properties = (TAzureStorageQueueOutputProperties) setupConnectionProperties(
            (AzureStorageProvideConnectionProperties) properties);
    properties.setupProperties();
    properties.queueName.setValue(TEST_QUEUE_NAME);
    Writer<?> writer = createWriter(properties);
    writer.open("test-uid");
    for (String m : messages) {
        IndexedRecord entity = new GenericData.Record(properties.schema.schema.getValue());
        entity.put(0, m + "SIMPLE");
        writer.write(entity);
    }
    writer.close();
    queue.downloadAttributes();
    assertEquals(3, queue.getApproximateMessageCount());
    for (CloudQueueMessage msg : queue.retrieveMessages(3)) {
        assertNotNull(msg.getMessageContentAsString());
        assertTrue(msg.getMessageContentAsString().indexOf("SIMPLE") > 0);
    }
}
 
Example #8
Source File: FastDatumWriterTest.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Test(groups = {"serializationTest"})
@SuppressWarnings("unchecked")
public void shouldCreateGenericDatumReader() throws IOException, InterruptedException {
  Schema recordSchema = createRecord("TestSchema", createPrimitiveUnionFieldSchema("test", Schema.Type.STRING));
  FastGenericDatumWriter<GenericRecord> fastGenericDatumReader = new FastGenericDatumWriter<>(recordSchema, cache);

  GenericRecord record = new GenericData.Record(recordSchema);
  record.put("test", "test");

  // when
  fastGenericDatumReader.write(record, AvroCompatibilityHelper.newBinaryEncoder(new ByteArrayOutputStream(), true, null));

  // then
  FastSerializer<GenericRecord> fastGenericSerializer =
      (FastSerializer<GenericRecord>) cache.getFastGenericSerializer(recordSchema);

  fastGenericSerializer = (FastSerializer<GenericRecord>) cache.getFastGenericSerializer(recordSchema);

  Assert.assertNotNull(fastGenericSerializer);
  Assert.assertNotEquals(2, fastGenericSerializer.getClass().getDeclaredMethods().length);
}
 
Example #9
Source File: TestAvroDecoder.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedLongArrayWithNulls()
{
    DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(new ArrayType(BIGINT)), "array_field", null, null, false, false, false);
    Schema schema = SchemaBuilder.array().items().nullable().array().items().nullable().longType();
    List<List<Long>> data = Arrays.asList(
            ImmutableList.of(12L, 15L, 17L),
            ImmutableList.of(22L, 25L, 27L, 29L),
            null,
            Arrays.asList(3L, 5L, null, 6L));

    GenericArray<List<Long>> list = new GenericData.Array<>(schema, data);
    Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list);

    checkArrayValue(decodedRow, row, list);
}
 
Example #10
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
AvroKeyValueWriter(Schema keySchema, Schema valueSchema,
		CodecFactory compressionCodec, OutputStream outputStream,
		int syncInterval) throws IOException {
	// Create the generic record schema for the key/value pair.
	mKeyValuePairSchema = AvroKeyValue
			.getSchema(keySchema, valueSchema);

	// Create an Avro container file and a writer to it.
	DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(
			mKeyValuePairSchema);
	mAvroFileWriter = new DataFileWriter<GenericRecord>(
			genericDatumWriter);
	mAvroFileWriter.setCodec(compressionCodec);
	mAvroFileWriter.setSyncInterval(syncInterval);
	mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

	// Create a reusable output record.
	mOutputRecord = new AvroKeyValue<Object, Object>(
			new GenericData.Record(mKeyValuePairSchema));
}
 
Example #11
Source File: AggregateCombineFnTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void MaxDoubleAccumulatorFnTest() {
    List<GenericData.Record> testData = genRecords(Arrays.asList(1.1, 2.2, 3.3, 10.10, 5.5), AvroUtils._double());

    AggregateCombineFn.MaxDoubleAccumulatorFn fn1 = new AggregateCombineFn.MaxDoubleAccumulatorFn();
    fn1.createAccumulator();
    AggregateCombineFn.MaxDoubleAccumulatorFn fn2 = new AggregateCombineFn.MaxDoubleAccumulatorFn();
    fn2.createAccumulator();
    AggregateCombineFn.MaxDoubleAccumulatorFn fn3 = new AggregateCombineFn.MaxDoubleAccumulatorFn();
    fn3.createAccumulator();
    double delta = 0.0;
    fn1.addInput(testData.get(0));
    Assert.assertEquals(1.1, fn1.extractOutput(), delta);
    fn1.addInput(testData.get(1));
    Assert.assertEquals(2.2, fn1.getAccumulators(), delta);

    fn2.addInput(testData.get(2));
    fn2.addInput(testData.get(3));
    Assert.assertEquals(10.10, fn2.extractOutput(), delta);

    fn3.addInput(testData.get(4));
    Assert.assertEquals(5.5, fn3.extractOutput(), delta);

    fn1.mergeAccumulators(Arrays.asList(fn2.getAccumulators(), fn3.getAccumulators()));
    Assert.assertEquals(10.10, fn1.extractOutput(), delta);
}
 
Example #12
Source File: TestFlumeEventAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    ByteBuffer body = (ByteBuffer) record.get("body");
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode(body).toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example #13
Source File: AvroKryoSerializerUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void addAvroSerializersIfRequired(ExecutionConfig reg, Class<?> type) {
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(type) ||
		org.apache.avro.generic.GenericData.Record.class.isAssignableFrom(type)) {

		// Avro POJOs contain java.util.List which have GenericData.Array as their runtime type
		// because Kryo is not able to serialize them properly, we use this serializer for them
		reg.registerTypeWithKryoSerializer(GenericData.Array.class, Serializers.SpecificInstanceCollectionSerializerForArrayList.class);

		// We register this serializer for users who want to use untyped Avro records (GenericData.Record).
		// Kryo is able to serialize everything in there, except for the Schema.
		// This serializer is very slow, but using the GenericData.Records of Kryo is in general a bad idea.
		// we add the serializer as a default serializer because Avro is using a private sub-type at runtime.
		reg.addDefaultKryoSerializer(Schema.class, AvroSchemaSerializer.class);
	}
}
 
Example #14
Source File: TopkPhaseTest.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
private List<GenericRecord> generateTestMapperData() throws Exception {
  List<GenericRecord> inputRecords = new ArrayList<GenericRecord>();

  GenericRecord input = new GenericData.Record(inputSchema);
  input.put("d1", "abc1");
  input.put("d2", 501L);
  input.put("d3", "xyz1");
  input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
  input.put("m1", 100);
  input.put("m2", 20);
  inputRecords.add(input);

  input = new GenericData.Record(inputSchema);
  input.put("d1", "abc2");
  input.put("d2", 502L);
  input.put("d3", "xyz2");
  input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
  input.put("m1", 10);
  input.put("m2", 20);
  inputRecords.add(input);

  return inputRecords;
}
 
Example #15
Source File: JiraUpdateWriterTestIT.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Checks {@link JiraUpdateWriter#write()} throws {@link IOException} which message contains
 * "Reason: user is not authenticated. Record wasn't updated"
 * in case server responses with 401 Unauthorized status code
 * 
 * @throws IOException
 */
@Test
public void testWriteUnauthorized() throws IOException {
    IndexedRecord badJsonRecord = new GenericData.Record(UPDATE_SCHEMA);
    String badProject = "{\"name\":\"Updated Integration Test Project\"\"assigneeType\":\"PROJECT_LEAD\"}";
    badJsonRecord.put(0, "TP");
    badJsonRecord.put(1, badProject);

    JiraWriter updateProjectWriter = JiraTestsHelper.createWriter(HOST_PORT, WRONG_USER, PASS, Resource.PROJECT,
            Action.UPDATE);

    thrown.expect(IOException.class);
    thrown.expectMessage("Reason: user is not authenticated. Record wasn't updated");
    thrown.expectMessage("Record: " + badProject);
    thrown.expectMessage("Error: ");

    updateProjectWriter.open("upd");
    try {
        updateProjectWriter.write(badJsonRecord);
    } finally {
        updateProjectWriter.close();
    }
}
 
Example #16
Source File: AvroToPojoTest.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
private void createReaderInput()
{
  int cnt = 3;

  recordList = Lists.newArrayList();

  while (cnt > 0) {
    GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(AVRO_SCHEMA));
    rec.put("orderId", cnt * 1L);
    rec.put("customerId", cnt * 2);
    rec.put("total", cnt * 1.5);
    rec.put("customerName", "*" + cnt + "*");
    cnt--;
    recordList.add(rec);
  }
}
 
Example #17
Source File: JiraDeleteWriterTestIT.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Checks {@link JiraDeleteWriter#write()} throws {@link IOException} which message contains
 * "User is not authenticated. Record wasn't deleted"
 * in case server responses with 401 Unauthorized status code
 * 
 * @throws IOException
 */
@Test
public void testWriteUnauthorized() throws IOException {
    String expectedError = "User is not authenticated. Record wasn't deleted";

    IndexedRecord badJsonRecord = new GenericData.Record(DELETE_SCHEMA);
    badJsonRecord.put(0, "TP");

    thrown.expect(IOException.class);
    thrown.expectMessage("Reason: user is not authenticated. Record wasn't deleted");
    thrown.expectMessage("Record: TP");
    thrown.expectMessage("Error: ");

    JiraWriter deleteProjectWriter = JiraTestsHelper.createWriter(HOST_PORT, WRONG_USER, PASS, Resource.PROJECT,
            Action.DELETE);
    deleteProjectWriter.open("del");
    try {
        deleteProjectWriter.write(badJsonRecord);
        fail();
    } catch (DataRejectException e) {
        String rejectError = e.getRejectInfo().get("error").toString();
        assertEquals(expectedError, rejectError);
    } finally {
        deleteProjectWriter.close();
    }
}
 
Example #18
Source File: AvroCodecTests.java    From schema-evolution-samples with Apache License 2.0 6 votes vote down vote up
@Test
public void genericEncoderV1GenericDecoderV1() throws Exception{
	Schema schema = load("users_v1.schema");
	SchemaRegistryClient client = mock(SchemaRegistryClient.class);
	AvroCodec codec = new AvroCodec();
	codec.setSchemaRegistryClient(client);
	when(client.register(any())).thenReturn(1);
	when(client.fetch(eq(1))).thenReturn(schema);
	GenericRecord record = new GenericData.Record(schema);
	record.put("name","joe");
	record.put("favoriteNumber",42);
	record.put("favoriteColor","blue");
	byte[] results = codec.encode(record);
	GenericRecord decoded = codec.decode(results,GenericRecord.class);
	Assert.assertEquals(record.get("name").toString(),decoded.get("name").toString());
}
 
Example #19
Source File: TestConvertAvroToJSON.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleAvroMessage_noContainer() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    runner.setProperty(ConvertAvroToJSON.CONTAINER_OPTIONS, ConvertAvroToJSON.CONTAINER_NONE);
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = AvroTestUtil.serializeAvroRecord(schema, datumWriter, user1);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}");
}
 
Example #20
Source File: ObjectStoreWriterTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testDelete() throws Exception {

  WorkUnitState wu = new WorkUnitState();
  wu.setProp(ObjectStoreDeleteConverter.OBJECT_ID_FIELD, "objectId");

  ObjectStoreClient client = new MockObjectStoreClient();
  byte[] objId = client.put(IOUtils.toInputStream("test", "UTF-8"), ConfigFactory.empty());
  Assert.assertEquals(IOUtils.toString(client.getObject(objId).getObjectData(), "UTF-8"), "test");

  try (ObjectStoreWriter writer = new ObjectStoreWriter(client, new State());) {
    ObjectStoreDeleteConverter converter = new ObjectStoreDeleteConverter();
    converter.init(wu);

    Schema schema = new Schema.Parser().parse(SCHEMA_STR);
    GenericRecord datum = new GenericData.Record(schema);
    datum.put("objectId", objId);

    Iterables.getFirst(converter.convertRecord(converter.convertSchema(schema, wu), datum, wu), null);
    writer.write(Iterables.getFirst(
        converter.convertRecord(converter.convertSchema(schema, wu), datum, new WorkUnitState()), null));
  }

  try {
    client.getObject(objId);
    Assert.fail("should have thrown an IOException as object is already deleted");
  } catch (IOException e) {
    // All good exception thrown because object does not exist
  }
}
 
Example #21
Source File: AvroSerializerUpgradeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public GenericRecord createTestData() {
	GenericData.Record record = new GenericData.Record(Address.getClassSchema());
	record.put("num", 239);
	record.put("street", "Baker Street");
	record.put("city", "London");
	record.put("state", "London");
	record.put("zip", "NW1 6XE");
	return record;
}
 
Example #22
Source File: FastSerdeBenchmarkSupport.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static <T extends SpecificRecord> T toSpecificRecord(GenericData.Record record) throws IOException {
    GenericDatumWriter<GenericData.Record> datumWriter = new GenericDatumWriter<>(record.getSchema());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    Encoder binaryEncoder = EncoderFactory.get().binaryEncoder(baos, null);
    datumWriter.write(record, binaryEncoder);
    binaryEncoder.flush();

    SpecificDatumReader<T> datumReader = new SpecificDatumReader<>(record.getSchema());
    return datumReader.read(null, DecoderFactory.get().binaryDecoder(baos.toByteArray(), null));
}
 
Example #23
Source File: AvroReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private GenericData getDataModel(Configuration conf) {
  if (model != null) {
    return model;
  }
  Class<? extends AvroDataSupplier> suppClass = conf.getClass(
      AVRO_DATA_SUPPLIER, SpecificDataSupplier.class, AvroDataSupplier.class);
  return ReflectionUtils.newInstance(suppClass, conf).get();
}
 
Example #24
Source File: TestCSVFileReader.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public DatasetTestUtilities.RecordValidator<GenericData.Record> getValidator() {
  return new DatasetTestUtilities.RecordValidator<GenericData.Record>() {
    private static final String chars = "abcdef";
    @Override
    public void validate(GenericData.Record record, int recordNum) {
      Assert.assertEquals(recordNum, record.get("id"));
      Assert.assertEquals(Character.toString(chars.charAt(recordNum)), record.get("string"));
      Assert.assertEquals((recordNum % 2) == 0, record.get("even"));
    }
  };
}
 
Example #25
Source File: AvroRecursionEliminatingConverterTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public File generateRecord()
    throws IOException {
  Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/recursive.avsc"));
  GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(inputSchema);
  GenericRecord record = new GenericData.Record(inputSchema);
  record.put("name", "John");
  record.put("date_of_birth", 1234L);
  record.put("last_modified", 4567L);
  record.put("created", 6789L);
  GenericRecord addressRecord = new GenericData.Record(inputSchema.getField("address").schema());
  addressRecord.put("city", "Los Angeles");
  addressRecord.put("street_number", 1234);


  GenericRecord innerAddressRecord = new GenericData.Record(inputSchema.getField("address").schema());
  innerAddressRecord.put("city", "San Francisco");
  innerAddressRecord.put("street_number", 3456);

  addressRecord.put("previous_address", innerAddressRecord);
  record.put("address", addressRecord);

  File recordFile = File.createTempFile(this.getClass().getSimpleName(),"avsc");
  DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
  dataFileWriter.create(inputSchema, recordFile);
  dataFileWriter.append(record);
  dataFileWriter.close();
  recordFile.deleteOnExit();
  return recordFile;
}
 
Example #26
Source File: GoogleDriveAbstractListReader.java    From components with Apache License 2.0 5 votes vote down vote up
private IndexedRecord convertSearchResultToIndexedRecord(File file) {
    // Main record
    IndexedRecord main = new GenericData.Record(schema);
    main.put(0, file.getId());
    main.put(1, file.getName());
    main.put(2, file.getMimeType());
    main.put(3, file.getModifiedTime().getValue());
    main.put(4, file.getSize());
    main.put(5, file.getKind());
    main.put(6, file.getTrashed());
    main.put(7, file.getParents().toString()); // TODO This should be a List<String>
    main.put(8, file.getWebViewLink());

    return main;
}
 
Example #27
Source File: AggregateCombineFnTest.java    From components with Apache License 2.0 5 votes vote down vote up
@Test
public void SumDoubleAccumulatorFnTest() {
    List<GenericData.Record> testData = genRecords(Arrays.asList(1.1f, 2.2f, 3.3f, 4.4f, 5.5f), AvroUtils._float());
    AggregateCombineFn.SumDoubleAccumulatorFn fn1 = new AggregateCombineFn.SumDoubleAccumulatorFn();
    AggregateCombineFn.SumDoubleAccumulatorFn fn2 = new AggregateCombineFn.SumDoubleAccumulatorFn();
    AggregateCombineFn.SumDoubleAccumulatorFn fn3 = new AggregateCombineFn.SumDoubleAccumulatorFn();
    fn1.createAccumulator();
    fn2.createAccumulator();
    fn3.createAccumulator();
    double delta = 0.000000000000001;

    fn1.addInput(testData.get(0));
    Assert.assertEquals(1.1, fn1.extractOutput(), delta);
    fn1.addInput(testData.get(1));
    Assert.assertEquals(3.3, fn1.extractOutput(), delta);

    fn2.addInput(testData.get(2));
    Assert.assertEquals(3.3, fn2.extractOutput(), delta);
    fn2.addInput(testData.get(3));
    Assert.assertEquals(7.7, fn2.extractOutput(), delta);

    fn3.addInput(testData.get(4));
    Assert.assertEquals(5.5, fn3.extractOutput(), delta);

    fn1.mergeAccumulators(Arrays.asList(fn2.getAccumulators(), fn3.getAccumulators()));
    Assert.assertEquals(16.5, fn1.extractOutput(), delta);
}
 
Example #28
Source File: TAzureStorageOuputTableTestIT.java    From components with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testInsertOrReplaceNullValue() throws Throwable {
    currentTable = tbl_test + "InsertOrReplaceNullValue";
    insertTestValues(currentTable);
    //
    properties.schema.schema.setValue(getSimpleTestSchema());
    properties.actionOnData.setValue(ActionOnData.Insert_Or_Replace);
    properties.schemaListener.afterSchema();
    Writer<?> writer = createWriter(properties);
    writer.open("test-uid");
    IndexedRecord entity;
    for (String p : partitions) {
        for (String r : rows) {
            entity = new GenericData.Record(getSimpleTestSchema());
            assertEquals(3, entity.getSchema().getFields().size());
            entity.put(0, p);
            entity.put(1, r);
            entity.put(2, null);
            writer.write(entity);
        }
    }

    writer.close();
    // check results
    BoundedReader reader = createReader(currentTable, filter, false);
    int counted = 0;
    assertTrue(reader.start());
    do {
        counted++;
        IndexedRecord current = (IndexedRecord) reader.getCurrent();
        assertNull(current.getSchema().getField("StringValue"));
        // Column with null values are not writed to azure
        assertEquals(3, current.getSchema().getFields().size());
    } while (reader.advance());
    reader.close();
    assertEquals(9, counted);
}
 
Example #29
Source File: BigQueryIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testAvroWrite() {
  BigQueryIO.Write<byte[]> writeIO =
      BigQueryIO.<byte[]>write()
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
          .withAvroFormatFunction(
              writeRequest -> {
                byte[] data = writeRequest.getElement();
                GenericRecord record = new GenericData.Record(writeRequest.getSchema());
                record.put("data", ByteBuffer.wrap(data));
                return record;
              });
  testWrite(writeIO, AVRO_WRITE_TIME_METRIC_NAME);
}
 
Example #30
Source File: TestReflectReadWrite.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private GenericRecord getGenericPojoUtf8() {
  Schema schema = ReflectData.get().getSchema(Pojo.class);
  GenericData.Record record = new GenericData.Record(schema);
  record.put("myboolean", true);
  record.put("mybyte", 1);
  record.put("myshort", 1);
  record.put("myint", 1);
  record.put("mylong", 2L);
  record.put("myfloat", 3.1f);
  record.put("mydouble", 4.1);
  record.put("mybytes", ByteBuffer.wrap(new byte[] { 1, 2, 3, 4 }));
  record.put("mystring", new Utf8("Hello"));
  record.put("myenum", new GenericData.EnumSymbol(
      schema.getField("myenum").schema(), "A"));
  Map<CharSequence, CharSequence> map = new HashMap<CharSequence, CharSequence>();
  map.put(new Utf8("a"), new Utf8("1"));
  map.put(new Utf8("b"), new Utf8("2"));
  record.put("mymap", map);
  record.put("myshortarray", new GenericData.Array<Integer>(
      schema.getField("myshortarray").schema(), Lists.newArrayList(1, 2)));
  record.put("myintarray", new GenericData.Array<Integer>(
      schema.getField("myintarray").schema(), Lists.newArrayList(1, 2)));
  record.put("mystringarray", new GenericData.Array<Utf8>(
      schema.getField("mystringarray").schema(), Lists.newArrayList(new Utf8("a"), new Utf8("b"))));
  record.put("mylist", new GenericData.Array<Utf8>(
      schema.getField("mylist").schema(), Lists.newArrayList(new Utf8("a"), new Utf8("b"), new Utf8("c"))));
  record.put("mystringable", new StringableObj("blah blah"));
  return record;
}