Java Code Examples for org.apache.avro.generic.GenericDatumReader#read()
The following examples show how to use
org.apache.avro.generic.GenericDatumReader#read() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroTestTools.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private static RecordIterator readRecordsFromJsonInputStream(Schema schema, InputStream is, Decoder decoder) { GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); return new RecordIterator(schema, new AbstractIterator<GenericRecord>() { @Override protected GenericRecord computeNext() { try { return reader.read(null, decoder); } catch (IOException ioe) { try { is.close(); } catch (IOException exc) { log.warn("Failed to close input stream.", exc); } endOfData(); return null; } } }); }
Example 2
Source File: AvroDeserializationSchema.java From flink with Apache License 2.0 | 5 votes |
@Override public T deserialize(byte[] message) throws IOException { // read record checkAvroInitialized(); inputStream.setBuffer(message); Schema readerSchema = getReaderSchema(); GenericDatumReader<T> datumReader = getDatumReader(); datumReader.setSchema(readerSchema); return datumReader.read(null, decoder); }
Example 3
Source File: OrcTestTools.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Deserialize json object into a list of java object as a row, and transform each of java object * into {@link Writable} counterpart for constructing {@link OrcStruct}, in convenience of Orc reading and writing. * * @param typeInfo The ORC schema in {@link TypeInfo} format. * @param file The file name in String format. * @return */ private OrcRowIterator readRecordsFromJsonInputStream(TypeInfo typeInfo, String file) throws IOException { InputStream is = OrcTestTools.class.getClassLoader().getResourceAsStream(file); // This getParent.getParent is dirty due to we need to simulate multiple-partitions scenarios in iTest. String schemaResourceName = new File(new File(file).getParentFile().getParent(), "schema.avsc").toString(); Schema attemptedSchema = readAvscSchema(schemaResourceName, OrcTestTools.class); final Schema avroSchema = attemptedSchema == null ? readAvscSchema(new File(new File(file).getParent(), "schema.avsc").toString(), OrcTestTools.class) : attemptedSchema; GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(avroSchema); Decoder decoder = DecoderFactory.get().jsonDecoder(avroSchema, is); return new OrcRowIterator(typeInfo, new AbstractIterator<Writable>() { @Override protected Writable computeNext() { try { GenericRecord record = reader.read(null, decoder); return getAvroWritable(record, avroSchema); } catch (IOException e) { try { is.close(); } catch (IOException ioec) { log.warn("Failed to read record from inputstream, will close it immediately", ioec); } endOfData(); return null; } } }); }
Example 4
Source File: SnowflakeAvroConverter.java From snowflake-kafka-connector with Apache License 2.0 | 5 votes |
/** * Parse Avro record with schema * * @param bytes avro data * @param schema avro schema * @return JsonNode array */ private JsonNode parseAvroWithSchema(final byte[] bytes, Schema schema) { GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); InputStream input = new ByteArrayInputStream(bytes); Decoder decoder = DecoderFactory.get().binaryDecoder(input, null); try { GenericRecord datum = reader.read(null, decoder); return mapper.readTree(datum.toString()); } catch (IOException e) { throw SnowflakeErrors.ERROR_0010.getException("Failed to parse AVRO " + "record\n" + e.toString()); } }
Example 5
Source File: TestLog4jAppenderWithAvro.java From kite with Apache License 2.0 | 5 votes |
@Test public void testAvroGeneric() throws IOException { loadProperties("flume-log4jtest-avro-generic.properties"); PropertyConfigurator.configure(props); Logger logger = LogManager.getLogger(TestLog4jAppenderWithAvro.class); String msg = "This is log message number " + String.valueOf(0); Schema schema = new Schema.Parser().parse( getClass().getClassLoader().getResource("myrecord.avsc").openStream()); GenericRecordBuilder builder = new GenericRecordBuilder(schema); GenericRecord record = builder.set("message", msg).build(); logger.info(record); Transaction transaction = ch.getTransaction(); transaction.begin(); Event event = ch.take(); Assert.assertNotNull(event); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(event.getBody(), null); GenericRecord recordFromEvent = reader.read(null, decoder); Assert.assertEquals(msg, recordFromEvent.get("message").toString()); Map<String, String> hdrs = event.getHeaders(); Assert.assertNull(hdrs.get(Log4jAvroHeaders.MESSAGE_ENCODING.toString())); Assert.assertEquals("Schema URL should be set", "file:///tmp/myrecord.avsc", hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_URL.toString())); Assert.assertNull("Schema string should not be set", hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_LITERAL.toString())); transaction.commit(); transaction.close(); }
Example 6
Source File: AbstractAvroSerializer.java From jstorm with Apache License 2.0 | 5 votes |
public GenericContainer read(Kryo kryo, Input input, Class<GenericContainer> aClass) { Schema theSchema = this.getSchema(input.readString()); GenericDatumReader<GenericContainer> reader = new GenericDatumReader<>(theSchema); Decoder decoder = DecoderFactory .get() .directBinaryDecoder(input, null); GenericContainer foo; try { foo = reader.read(null, decoder); } catch (IOException e) { throw new RuntimeException(e); } return foo; }
Example 7
Source File: NamespaceValidationTest.java From avro-util with BSD 2-Clause "Simplified" License | 5 votes |
@Test public void testAvro14DoesntValidateNamespace() throws Exception { AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion(); if (runtimeVersion != AvroVersion.AVRO_1_4) { throw new SkipException("only supported under " + AvroVersion.AVRO_1_4 + ". runtime version detected as " + runtimeVersion); } String withAvsc = TestUtil.load("HasNamespace.avsc"); Schema with = Schema.parse(withAvsc); String withoutAvsc = TestUtil.load("HasNoNamespace.avsc"); Schema without = Schema.parse(withoutAvsc); GenericData.Record record = new GenericData.Record(without); record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B")); ByteArrayOutputStream os = new ByteArrayOutputStream(); GenericDatumWriter writer = new GenericDatumWriter(without); BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os); //noinspection unchecked writer.write(record, encoder); encoder.flush(); byte[] bytes = os.toByteArray(); GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with); BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null); GenericData.Record read = reader.read(null, decoder); String value = String.valueOf(read.get("f")); Assert.assertEquals(value, "B"); }
Example 8
Source File: NamespaceValidationTest.java From avro-util with BSD 2-Clause "Simplified" License | 5 votes |
@Test public void testModernAvroValidatesNamespaces() throws Exception { AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion(); if (!runtimeVersion.laterThan(AvroVersion.AVRO_1_4)) { throw new SkipException("only supported under modern avro. runtime version detected as " + runtimeVersion); } String withAvsc = TestUtil.load("HasNamespace.avsc"); Schema with = Schema.parse(withAvsc); String withoutAvsc = TestUtil.load("HasNoNamespace.avsc"); Schema without = Schema.parse(withoutAvsc); GenericData.Record record = new GenericData.Record(without); record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B")); ByteArrayOutputStream os = new ByteArrayOutputStream(); GenericDatumWriter writer = new GenericDatumWriter(without); BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os); //noinspection unchecked writer.write(record, encoder); encoder.flush(); byte[] bytes = os.toByteArray(); GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with); BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null); try { GenericData.Record read = reader.read(null, decoder); Assert.fail("deserialization was expected to fail"); } catch (Exception expected) { Assert.assertTrue(expected.getMessage().contains("Found EnumType, expecting com.acme.EnumType")); } }
Example 9
Source File: FastSerdeTestsSupport.java From avro-fastserde with Apache License 2.0 | 5 votes |
public static <T> T deserializeGeneric(Schema schema, Decoder decoder) { GenericDatumReader<T> datumReader = new GenericDatumReader<>(schema); try { return datumReader.read(null, decoder); } catch (Exception e) { throw new RuntimeException(e); } }
Example 10
Source File: ReplicatorKafkaAvroTest.java From replicator with Apache License 2.0 | 5 votes |
public static String avroToJson(byte[] avro, Schema schema) throws IOException { boolean pretty = false; GenericDatumReader<Object> reader = new GenericDatumReader<>(schema); DatumWriter<Object> writer = new GenericDatumWriter<>(schema); ByteArrayOutputStream output = new ByteArrayOutputStream(); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty); Decoder decoder = DecoderFactory.get().binaryDecoder(avro, null); Object datum = reader.read(null, decoder); writer.write(datum, encoder); encoder.flush(); output.flush(); return new String(output.toByteArray(), "UTF-8"); }
Example 11
Source File: HoodieAvroUtils.java From hudi with Apache License 2.0 | 5 votes |
/** * Convert serialized bytes back into avro record. */ public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException { BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, reuseDecoder.get()); reuseDecoder.set(decoder); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); return reader.read(null, decoder); }
Example 12
Source File: HoodieAvroUtils.java From hudi with Apache License 2.0 | 5 votes |
/** * Convert json bytes back into avro record. */ public static GenericRecord jsonBytesToAvro(byte[] bytes, Schema schema) throws IOException { ByteArrayInputStream bio = new ByteArrayInputStream(bytes); JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, bio); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); return reader.read(null, jsonDecoder); }
Example 13
Source File: HoodieAvroDataBlock.java From hudi with Apache License 2.0 | 5 votes |
@Override protected void deserializeRecords() throws IOException { SizeAwareDataInputStream dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get()))); // 1. Read version for this data block int version = dis.readInt(); HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version); // Get schema from the header Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA)); // If readerSchema was not present, use writerSchema if (schema == null) { schema = writerSchema; } GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, schema); // 2. Get the total records int totalRecords = 0; if (logBlockVersion.hasRecordCount()) { totalRecords = dis.readInt(); } List<IndexedRecord> records = new ArrayList<>(totalRecords); // 3. Read the content for (int i = 0; i < totalRecords; i++) { int recordLength = dis.readInt(); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(), recordLength, decoderCache.get()); decoderCache.set(decoder); IndexedRecord record = reader.read(null, decoder); records.add(record); dis.skipBytes(recordLength); } dis.close(); this.records = records; // Free up content to be GC'd, deflate deflate(); }
Example 14
Source File: RegistryAvroDeserializationSchema.java From flink with Apache License 2.0 | 5 votes |
@Override public T deserialize(byte[] message) throws IOException { checkAvroInitialized(); getInputStream().setBuffer(message); Schema writerSchema = schemaCoder.readSchema(getInputStream()); Schema readerSchema = getReaderSchema(); GenericDatumReader<T> datumReader = getDatumReader(); datumReader.setSchema(writerSchema); datumReader.setExpected(readerSchema); return datumReader.read(null, getDecoder()); }
Example 15
Source File: AvroSerDeFactory.java From samza with Apache License 2.0 | 4 votes |
private static <T> T genericRecordFromBytes(byte[] bytes, Schema schema) throws IOException { BinaryDecoder binDecoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null); GenericDatumReader<T> reader = new GenericDatumReader<>(schema); return reader.read(null, binDecoder); }
Example 16
Source File: TestAvroRelConversion.java From samza with Apache License 2.0 | 4 votes |
private static <T> T genericRecordFromBytes(byte[] bytes, Schema schema) throws IOException { BinaryDecoder binDecoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null); GenericDatumReader<T> reader = new GenericDatumReader<>(schema); return reader.read(null, binDecoder); }
Example 17
Source File: TestWriteAvroResultWithoutSchema.java From nifi with Apache License 2.0 | 4 votes |
@Override protected GenericRecord readRecord(final InputStream in, final Schema schema) throws IOException { final BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null); final GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); return reader.read(null, decoder); }
Example 18
Source File: AvroUtilsTest.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Test public void testDecorateRecordWithNestedField() throws IOException { Schema inputRecordSchema = SchemaBuilder.record("test").fields() .name("integer1") .prop("innerProp", "innerVal") .type().intBuilder().endInt().noDefault() .requiredString("string1") .endRecord(); GenericRecord inputRecord = new GenericData.Record(inputRecordSchema); inputRecord.put("integer1", 10); inputRecord.put("string1", "hello"); Schema nestedFieldSchema = SchemaBuilder.builder().record("metadata") .fields() .requiredString("source") .requiredLong("timestamp") .endRecord(); Schema.Field nestedField = new Schema.Field("metadata", nestedFieldSchema, "I am a nested field", null); Schema outputRecordSchema = AvroUtils.decorateRecordSchema(inputRecordSchema, Collections.singletonList(nestedField)); Map<String, Object> newFields = new HashMap<>(); GenericData.Record metadataRecord = new GenericData.Record(nestedFieldSchema); metadataRecord.put("source", "oracle"); metadataRecord.put("timestamp", 1234L); newFields.put("metadata", metadataRecord); GenericRecord outputRecord = AvroUtils.decorateRecord(inputRecord, newFields, outputRecordSchema); Assert.assertEquals(outputRecord.get("integer1"), 10); Assert.assertEquals(outputRecord.get("string1"), "hello"); Assert.assertEquals(outputRecord.get("metadata"), metadataRecord); // Test that serializing and deserializing this record works. GenericDatumWriter writer = new GenericDatumWriter(outputRecordSchema); ByteArrayOutputStream baos = new ByteArrayOutputStream(1000); Encoder binaryEncoder = EncoderFactory.get().binaryEncoder(baos, null); writer.write(outputRecord, binaryEncoder); binaryEncoder.flush(); baos.close(); ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); Decoder binaryDecoder = DecoderFactory.get().binaryDecoder(bais, null); GenericDatumReader reader = new GenericDatumReader(outputRecordSchema); GenericRecord deserialized = (GenericRecord) reader.read(null, binaryDecoder); Assert.assertEquals(deserialized.get("integer1"), 10); Assert.assertEquals(deserialized.get("string1").toString(), "hello"); //extra toString: avro returns Utf8 Assert.assertEquals(deserialized.get("metadata"), metadataRecord); }
Example 19
Source File: AvroUtils.java From incubator-gobblin with Apache License 2.0 | 4 votes |
/** * Deserialize a {@link GenericRecord} from a byte array. This method is not intended for high performance. */ public static GenericRecord slowDeserializeGenericRecord(byte[] serializedRecord, Schema schema) throws IOException { Decoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord, null); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); return reader.read(null, decoder); }
Example 20
Source File: AvroUtils.java From brooklin with BSD 2-Clause "Simplified" License | 2 votes |
/** * Decode and deserialize the Json byte array into an instance of an Avro record * @param schema schema describing the expected information of the bytes. * @param bytes Json string in bytes to decode * @return decoded instance of GenericRecord */ public static <T> T decodeJsonAsAvroGenericRecord(Schema schema, byte[] bytes, T reuse) throws IOException { JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, new String(bytes, StandardCharsets.UTF_8)); GenericDatumReader<T> reader = new GenericDatumReader<>(schema); return reader.read(reuse, jsonDecoder); }