org.apache.avro.generic.GenericDatumReader Java Examples
The following examples show how to use
org.apache.avro.generic.GenericDatumReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroEventDeserializer.java From mt-flume with Apache License 2.0 | 6 votes |
private void initialize() throws IOException, NoSuchAlgorithmException { SeekableResettableInputBridge in = new SeekableResettableInputBridge(ris); long pos = in.tell(); in.seek(0L); fileReader = new DataFileReader<GenericRecord>(in, new GenericDatumReader<GenericRecord>()); fileReader.sync(pos); schema = fileReader.getSchema(); datumWriter = new GenericDatumWriter(schema); out = new ByteArrayOutputStream(); encoder = EncoderFactory.get().binaryEncoder(out, encoder); schemaHash = SchemaNormalization.parsingFingerprint("CRC-64-AVRO", schema); schemaHashString = Hex.encodeHexString(schemaHash); }
Example #2
Source File: AvroUtils.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Get the latest avro schema for a directory * @param directory the input dir that contains avro files * @param fs the {@link FileSystem} for the given directory. * @param latest true to return latest schema, false to return oldest schema * @return the latest/oldest schema in the directory * @throws IOException */ public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException { Schema schema = null; try (Closer closer = Closer.create()) { List<FileStatus> files = getDirectorySchemaHelper(directory, fs); if (files == null || files.size() == 0) { LOG.warn("There is no previous avro file in the directory: " + directory); } else { FileStatus file = latest ? files.get(0) : files.get(files.size() - 1); LOG.debug("Path to get the avro schema: " + file); FsInput fi = new FsInput(file.getPath(), fs.getConf()); GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>(); schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema(); } } catch (IOException ioe) { throw new IOException("Cannot get the schema for directory " + directory, ioe); } return schema; }
Example #3
Source File: TestSelectHive3QL.java From nifi with Apache License 2.0 | 6 votes |
private long getNumberOfRecordsFromStream(InputStream in) throws IOException { final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) { GenericRecord record = null; long recordsFromStream = 0; while (dataFileReader.hasNext()) { // Reuse record object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with // many items. record = dataFileReader.next(record); recordsFromStream += 1; } return recordsFromStream; } }
Example #4
Source File: AvroToRestJsonEntryConverterTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException { Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc")); GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); File tmp = File.createTempFile(this.getClass().getSimpleName(), null); tmp.deleteOnExit(); try { FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader); GenericRecord avroRecord = dataFileReader.next(); AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter(); RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next(); Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath()); JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false); converter.close(); dataFileReader.close(); } finally { if (tmp != null) { tmp.delete(); } } }
Example #5
Source File: TestSplitAvro.java From nifi with Apache License 2.0 | 6 votes |
@Test public void testRecordSplitDatafileOutputWithoutMetadata() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitAvro()); runner.setProperty(SplitAvro.TRANSFER_METADATA, "false"); runner.enqueue(users.toByteArray()); runner.run(); runner.assertTransferCount(SplitAvro.REL_SPLIT, 100); runner.assertTransferCount(SplitAvro.REL_ORIGINAL, 1); runner.assertTransferCount(SplitAvro.REL_FAILURE, 0); runner.getFlowFilesForRelationship(SplitAvro.REL_ORIGINAL).get(0).assertAttributeEquals(FRAGMENT_COUNT.key(), "100"); final List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(SplitAvro.REL_SPLIT); checkDataFileSplitSize(flowFiles, 1, false); for (final MockFlowFile flowFile : flowFiles) { try (final ByteArrayInputStream in = new ByteArrayInputStream(flowFile.toByteArray()); final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) { Assert.assertFalse(reader.getMetaKeys().contains(META_KEY1)); Assert.assertFalse(reader.getMetaKeys().contains(META_KEY2)); Assert.assertFalse(reader.getMetaKeys().contains(META_KEY3)); } } }
Example #6
Source File: MapOutputValue.java From incubator-pinot with Apache License 2.0 | 6 votes |
public static MapOutputValue fromBytes(byte[] bytes, Map<String, Schema> schemaMap) throws IOException { DataInputStream dataInputStream = new DataInputStream(new ByteArrayInputStream(bytes)); int length = dataInputStream.readInt(); byte[] sourceNameBytes = new byte[length]; dataInputStream.read(sourceNameBytes); String schemaName = new String(sourceNameBytes); int recordDataLength = dataInputStream.readInt(); byte[] recordBytes = new byte[recordDataLength]; dataInputStream.read(recordBytes); Schema schema = schemaMap.get(schemaName); GenericRecord record = new GenericData.Record(schema); binaryDecoder = DecoderFactory.get().binaryDecoder(recordBytes, binaryDecoder); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema); gdr.read(record, binaryDecoder); return new MapOutputValue(schemaName, record); }
Example #7
Source File: AvroDateRangeMetadata.java From datafu with Apache License 2.0 | 6 votes |
/** * Reads the date range from the metadata stored in an Avro file. * * @param fs file system to access path * @param path path to get date range for * @return date range * @throws IOException IOException */ public static DateRange getOutputFileDateRange(FileSystem fs, Path path) throws IOException { path = fs.listStatus(path, PathUtils.nonHiddenPathFilter)[0].getPath(); FSDataInputStream dataInputStream = fs.open(path); DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader); try { return new DateRange(new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_START))), new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_END)))); } finally { dataFileStream.close(); dataInputStream.close(); } }
Example #8
Source File: Converter.java From xml-avro with Apache License 2.0 | 6 votes |
public static void avroToXml(File avroFile, File xmlFile) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element")); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader); GenericRecord record = dataFileReader.next(); Document doc; try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } Element el = unwrapElement(record, doc); doc.appendChild(el); saveDocument(doc, xmlFile); }
Example #9
Source File: TestSplitAvro.java From localization_nifi with Apache License 2.0 | 6 votes |
private void checkDataFileSplitSize(List<MockFlowFile> flowFiles, int expectedRecordsPerSplit, boolean checkMetadata) throws IOException { for (final MockFlowFile flowFile : flowFiles) { try (final ByteArrayInputStream in = new ByteArrayInputStream(flowFile.toByteArray()); final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) { int count = 0; GenericRecord record = null; while (reader.hasNext()) { record = reader.next(record); Assert.assertNotNull(record.get("name")); Assert.assertNotNull(record.get("favorite_number")); count++; } assertEquals(expectedRecordsPerSplit, count); if (checkMetadata) { assertEquals(META_VALUE1, reader.getMetaString(META_KEY1)); assertEquals(META_VALUE2, reader.getMetaLong(META_KEY2)); assertEquals(META_VALUE3, new String(reader.getMeta(META_KEY3), "UTF-8")); } } } }
Example #10
Source File: AvroMorphlineTest.java From kite with Apache License 2.0 | 6 votes |
private void runTweetContainer(String morphlineConfigFile, String[] fieldNames) throws Exception { File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro"); morphline = createMorphline(morphlineConfigFile); for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers Record record = new Record(); byte[] body = Files.toByteArray(file); record.put(Fields.ATTACHMENT_BODY, body); collector.reset(); startSession(); Notifications.notifyBeginTransaction(morphline); assertTrue(morphline.process(record)); assertEquals(1, collector.getNumStartEvents()); assertEquals(2104, collector.getRecords().size()); FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader()); int i = 0; while (reader.hasNext()) { Record actual = collector.getRecords().get(i); GenericData.Record expected = reader.next(); assertTweetEquals(expected, actual, fieldNames, i); i++; } assertEquals(collector.getRecords().size(), i); } }
Example #11
Source File: AvroScanner.java From tajo with Apache License 2.0 | 6 votes |
/** * Initializes the AvroScanner. */ @Override public void init() throws IOException { if (targets == null) { targets = schema.toArray(); } prepareProjection(targets); outTuple = new VTuple(projectionMap.length); Schema avroSchema = AvroUtil.getAvroSchema(meta, conf); avroFields = avroSchema.getFields(); DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema); SeekableInput input = new FsInput(fragment.getPath(), conf); dataFileReader = new DataFileReader<>(input, datumReader); super.init(); }
Example #12
Source File: TestHelper.java From incubator-gobblin with Apache License 2.0 | 6 votes |
public static void assertGenericRecords(File outputAvroFile, Schema schema) throws IOException { try (DataFileReader<GenericRecord> reader = new DataFileReader<>(outputAvroFile, new GenericDatumReader<GenericRecord>(schema))) { Iterator<GenericRecord> iterator = reader.iterator(); GenericRecord record = iterator.next(); Assert.assertEquals(record.get("name").toString(), "Alyssa"); record = iterator.next(); Assert.assertEquals(record.get("name").toString(), "Ben"); record = iterator.next(); Assert.assertEquals(record.get("name").toString(), "Charlie"); Assert.assertFalse(iterator.hasNext()); } }
Example #13
Source File: OracleGenericSchemaDecoder.java From DBus with Apache License 2.0 | 6 votes |
private void initDecoder() { try { genericSchema = OracleGenericSchemaProvider.getInstance().getSchema("generic_wrapper.avsc"); fullPullSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_FULL_PULL_REQUESTS.avsc"); fullPullHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_FULL_PULL_REQUESTS.avsc"); syncEventSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.META_SYNC_EVENT.avsc"); syncEventHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.META_SYNC_EVENT.avsc"); heartbeatSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_HEARTBEAT_MONITOR.avsc"); heartbeatHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_HEARTBEAT_MONITOR.avsc"); datumReader = new GenericDatumReader<>(genericSchema); datumWriter = new GenericDatumWriter<>(genericSchema); } catch (Exception e) { logger.error("OracleGenericSchemaDecoder Initialization Error!", e); e.printStackTrace(); } }
Example #14
Source File: AvroRecordWriterTest.java From data-highway with Apache License 2.0 | 6 votes |
@Test public void typical() throws Exception { Schema schema = SchemaBuilder .builder() .record("record") .fields() .requiredLong("id") .requiredString("name") .endRecord(); Record value = new GenericRecordBuilder(schema).set("id", 1L).set("name", "hello").build(); ByteArrayOutputStream output = new ByteArrayOutputStream(); Factory factory = new Factory(CodecFactory.nullCodec()); RecordWriter writer = factory.create(schema, output); writer.write(value); writer.close(); SeekableInput input = new SeekableByteArrayInput(output.toByteArray()); DatumReader<Record> datumReader = new GenericDatumReader<>(schema); DataFileReader<Record> dataFileReader = new DataFileReader<>(input, datumReader); assertThat(dataFileReader.next(), is(value)); assertThat(dataFileReader.hasNext(), is(false)); dataFileReader.close(); }
Example #15
Source File: KafkaAvroExtractor.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public KafkaAvroExtractor(WorkUnitState state) { super(state); this.schemaRegistry = state.contains(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_CLASS) ? Optional.of(KafkaSchemaRegistry.<K, Schema> get(state.getProperties())) : Optional.<KafkaSchemaRegistry<K, Schema>> absent(); this.schema = getExtractorSchema(); if (this.schema.isPresent()) { this.reader = Optional.of(new GenericDatumReader<Record>(this.schema.get())); } else { log.error(String.format("Cannot find latest schema for topic %s. This topic will be skipped", this.topicName)); this.reader = Optional.absent(); } }
Example #16
Source File: FixedFlowInputBoundedReader.java From components with Apache License 2.0 | 5 votes |
/** * This method will instantiate correct Avro Schema object. This is mandatory since the "Schema" object of Avro are * not serializable. */ public void deserializeSchema() { if (schema == null) { Schema.Parser parser = new Schema.Parser(); schema = parser.parse(schemaString); reader = new GenericDatumReader<>(schema); } }
Example #17
Source File: TestWriteAvroResultWithSchema.java From nifi with Apache License 2.0 | 5 votes |
@Override protected List<GenericRecord> readRecords(final InputStream in, final Schema schema, final int recordCount) throws IOException { final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(in, new GenericDatumReader<>()); final Schema avroSchema = dataFileStream.getSchema(); GenericData.setStringType(avroSchema, StringType.String); List<GenericRecord> records = new ArrayList<>(); for (int i = 0; i < recordCount; i++) { records.add(dataFileStream.next()); } return records; }
Example #18
Source File: TestDataModelUtil.java From kite with Apache License 2.0 | 5 votes |
@Test public void testGetDatumReaderForGenericType() { Class<GenericData.Record> type = GenericData.Record.class; Schema writerSchema = StandardEvent.getClassSchema(); DatumReader result = DataModelUtil.getDatumReaderForType(type, writerSchema); assertEquals(GenericDatumReader.class, result.getClass()); }
Example #19
Source File: AvroUtils.java From ml-ease with Apache License 2.0 | 5 votes |
/** * Loads the schema from an Avro data file. * * @param conf The JobConf. * @param path The path to the data file. * @return The schema read from the data file's metadata. * @throws IOException */ public static Schema getSchemaFromFile(JobConf conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(new Configuration()); FSDataInputStream dataInputStream = fs.open(path); DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader); return dataFileStream.getSchema(); }
Example #20
Source File: ServerSinkSourceConfigurationTest.java From divolte-collector with Apache License 2.0 | 5 votes |
private static Stream<GenericRecord> listRecords(final Path avroFile) { final GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); logger.debug("Reading records from new Avro file: {}", avroFile); try (final FileReader<GenericRecord> fileReader = DataFileReader.openReader(avroFile.toFile(), datumReader)) { final ImmutableList<GenericRecord> records = ImmutableList.copyOf(fileReader.iterator()); logger.info("Read {} record(s) from new Avro file: {}", records.size(), avroFile); return records.stream(); } catch (final IOException e) { throw new UncheckedIOException("Error reading records from file: " + avroFile, e); } }
Example #21
Source File: AvroCodec.java From schema-evolution-samples with Apache License 2.0 | 5 votes |
private DatumReader getDatumReader(Class<?> type, Schema writer){ DatumReader reader = null; if(SpecificRecord.class.isAssignableFrom(type)){ reader = new SpecificDatumReader<>(writer,getReaderSchema(writer)); } else if(GenericRecord.class.isAssignableFrom(type)){ reader = new GenericDatumReader<>(writer,getReaderSchema(writer)); }else{ reader = new ReflectDatumReader<>(writer,getReaderSchema(writer)); } return reader; }
Example #22
Source File: TestExecuteSQL.java From nifi with Apache License 2.0 | 5 votes |
@Test public void testCompression() throws SQLException, CompressorException, IOException { // remove previous test database, if any final File dbLocation = new File(DB_LOCATION); dbLocation.delete(); // load test data to database final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); Statement stmt = con.createStatement(); try { stmt.execute("drop table TEST_NULL_INT"); } catch (final SQLException sqle) { } stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))"); stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)"); stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)"); runner.setIncomingConnection(false); runner.setProperty(ExecuteSQL.COMPRESSION_FORMAT, AvroUtil.CodecType.BZIP2.name()); runner.setProperty(ExecuteSQL.SQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT"); runner.run(); runner.assertAllFlowFilesTransferred(ExecuteSQL.REL_SUCCESS, 1); MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExecuteSQL.REL_SUCCESS).get(0); try (DataFileStream<GenericRecord> dfs = new DataFileStream<>(new ByteArrayInputStream(flowFile.toByteArray()), new GenericDatumReader<GenericRecord>())) { assertEquals(AvroUtil.CodecType.BZIP2.name().toLowerCase(), dfs.getMetaString(DataFileConstants.CODEC).toLowerCase()); } }
Example #23
Source File: CustomAvroRecordPreparer.java From pxf with Apache License 2.0 | 5 votes |
private void initAvro() throws Exception { FileInputStream fis = new FileInputStream(schema_name); schema = new Schema.Parser().parse(fis); datum = new GenericData.Record(schema); writer = new GenericDatumWriter<>(schema); reader = new GenericDatumReader<>(schema); fct_en = EncoderFactory.get(); fis.close(); }
Example #24
Source File: TestPutHive3Streaming.java From nifi with Apache License 2.0 | 5 votes |
private void assertOutputAvroRecords(List<Map<String, Object>> expectedRecords, MockFlowFile resultFlowFile) throws IOException { assertEquals(String.valueOf(expectedRecords.size()), resultFlowFile.getAttribute(PutHive3Streaming.HIVE_STREAMING_RECORD_COUNT_ATTR)); final DataFileStream<GenericRecord> reader = new DataFileStream<>( new ByteArrayInputStream(resultFlowFile.toByteArray()), new GenericDatumReader<>()); Schema schema = reader.getSchema(); // Verify that the schema is preserved assertEquals(schema, new Schema.Parser().parse(new File("src/test/resources/user.avsc"))); GenericRecord record = null; for (Map<String, Object> expectedRecord : expectedRecords) { assertTrue(reader.hasNext()); record = reader.next(record); final String name = record.get("name").toString(); final Integer favorite_number = (Integer) record.get("favorite_number"); assertNotNull(name); assertNotNull(favorite_number); assertNull(record.get("favorite_color")); assertNull(record.get("scale")); assertEquals(expectedRecord.get("name"), name); assertEquals(expectedRecord.get("favorite_number"), favorite_number); } assertFalse(reader.hasNext()); }
Example #25
Source File: AvroUtilsTest.java From kite with Apache License 2.0 | 5 votes |
@Test public void testReadAvroEntity() throws Exception { String schemaString = "{ \"type\": \"int\" }"; InputStream is = new ByteArrayInputStream(schemaString.getBytes()); Schema schema = parser.parse(is); byte[] bytes = new byte[] { (byte) 1 }; DatumReader<Integer> reader = new GenericDatumReader<Integer>(schema); Integer i = AvroUtils.readAvroEntity(bytes, reader); assertEquals(-1, i.intValue()); }
Example #26
Source File: AvroDataFileParser.java From datacollector with Apache License 2.0 | 5 votes |
public AvroDataFileParser(ProtoConfigurableEntity.Context context, Schema schema, File file, String readerOffset, int maxObjectLength, boolean skipUnionIndexes) throws IOException { this.context = context; this.file = file; this.skipUnionIndexes = skipUnionIndexes; DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema, schema, GenericData.get()); sin = new SeekableOverrunFileInputStream( new FileInputStream(file), maxObjectLength, true); dataFileReader = new DataFileReader<>(sin, datumReader); if(readerOffset != null && !readerOffset.isEmpty() && !"0".equals(readerOffset)) { String[] split = readerOffset.split(OFFSET_SEPARATOR); if(split.length == 3) { //split[0] is the file name previousSync = Long.parseLong(split[1]); recordCount = Long.parseLong(split[2]); seekToOffset(); } else if (split.length == 2) { previousSync = Long.parseLong(split[0]); recordCount = Long.parseLong(split[1]); seekToOffset(); } else { throw new IllegalArgumentException(Utils.format("Invalid offset {}", readerOffset)); } } else { recordCount = 0; previousSync = dataFileReader.previousSync(); } }
Example #27
Source File: AvroIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("unchecked") @Category(NeedsRunner.class) public void testMetadata() throws Exception { List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); File outputFile = tmpFolder.newFile("output.avro"); writePipeline .apply(Create.of(values)) .apply( AvroIO.write(GenericClass.class) .to(outputFile.getAbsolutePath()) .withoutSharding() .withMetadata( ImmutableMap.of( "stringKey", "stringValue", "longKey", 100L, "bytesKey", "bytesValue".getBytes(Charsets.UTF_8)))); writePipeline.run(); try (DataFileStream dataFileStream = new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) { assertEquals("stringValue", dataFileStream.getMetaString("stringKey")); assertEquals(100L, dataFileStream.getMetaLong("longKey")); assertArrayEquals( "bytesValue".getBytes(Charsets.UTF_8), dataFileStream.getMeta("bytesKey")); } }
Example #28
Source File: AvroCoder.java From beam with Apache License 2.0 | 5 votes |
protected AvroCoder(Class<T> type, Schema schema) { this.type = type; this.schemaSupplier = new SerializableSchemaSupplier(schema); typeDescriptor = TypeDescriptor.of(type); nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema); // Decoder and Encoder start off null for each thread. They are allocated and potentially // reused inside encode/decode. this.decoder = new EmptyOnDeserializationThreadLocal<>(); this.encoder = new EmptyOnDeserializationThreadLocal<>(); this.reflectData = Suppliers.memoize(new SerializableReflectDataSupplier(getType())); // Reader and writer are allocated once per thread per Coder this.reader = new EmptyOnDeserializationThreadLocal<DatumReader<T>>() { private final AvroCoder<T> myCoder = AvroCoder.this; @Override public DatumReader<T> initialValue() { return myCoder.getType().equals(GenericRecord.class) ? new GenericDatumReader<>(myCoder.getSchema()) : new ReflectDatumReader<>( myCoder.getSchema(), myCoder.getSchema(), myCoder.reflectData.get()); } }; this.writer = new EmptyOnDeserializationThreadLocal<DatumWriter<T>>() { private final AvroCoder<T> myCoder = AvroCoder.this; @Override public DatumWriter<T> initialValue() { return myCoder.getType().equals(GenericRecord.class) ? new GenericDatumWriter<>(myCoder.getSchema()) : new ReflectDatumWriter<>(myCoder.getSchema(), myCoder.reflectData.get()); } }; }
Example #29
Source File: AvroFieldsPickConverterTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testFieldsPickWithNestedRecord() throws Exception { Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/pickfields_nested_with_union.avsc")); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setProp(ConfigurationKeys.CONVERTER_AVRO_FIELD_PICK_FIELDS, "name,favorite_number,nested1.nested1_string,nested1.nested2_union.nested2_string"); try (AvroFieldsPickConverter converter = new AvroFieldsPickConverter()) { Schema convertedSchema = converter.convertSchema(inputSchema, workUnitState); Schema expectedSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/converted_pickfields_nested_with_union.avsc")); JSONAssert.assertEquals(expectedSchema.toString(), convertedSchema.toString(), false); try (DataFileReader<GenericRecord> srcDataFileReader = new DataFileReader<GenericRecord>( new File(getClass().getResource("/converter/pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(inputSchema)); DataFileReader<GenericRecord> expectedDataFileReader = new DataFileReader<GenericRecord>( new File(getClass().getResource("/converter/converted_pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(expectedSchema));) { while (expectedDataFileReader.hasNext()) { GenericRecord expected = expectedDataFileReader.next(); GenericRecord actual = converter.convertRecord(convertedSchema, srcDataFileReader.next(), workUnitState).iterator().next(); Assert.assertEquals(actual, expected); } Assert.assertTrue(!srcDataFileReader.hasNext()); } } }
Example #30
Source File: KinesisInputRuntime.java From components with Apache License 2.0 | 5 votes |
@DoFn.ProcessElement public void processElement(ProcessContext c) throws IOException { if (schema == null) { schema = new Schema.Parser().parse(schemaStr); datumReader = new GenericDatumReader<GenericRecord>(schema); } decoder = DecoderFactory.get().binaryDecoder(c.element().getDataAsBytes(), decoder); GenericRecord record = datumReader.read(null, decoder); c.output(record); }