Java Code Examples for org.apache.avro.generic.GenericData#Array
The following examples show how to use
org.apache.avro.generic.GenericData#Array .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAvroDecoder.java From presto with Apache License 2.0 | 6 votes |
@Test public void testNestedLongArrayWithNulls() { DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(new ArrayType(BIGINT)), "array_field", null, null, false, false, false); Schema schema = SchemaBuilder.array().items().nullable().array().items().nullable().longType(); List<List<Long>> data = Arrays.asList( ImmutableList.of(12L, 15L, 17L), ImmutableList.of(22L, 25L, 27L, 29L), null, Arrays.asList(3L, 5L, null, 6L)); GenericArray<List<Long>> list = new GenericData.Array<>(schema, data); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list); checkArrayValue(decodedRow, row, list); }
Example 2
Source File: TestAvroDecoder.java From presto with Apache License 2.0 | 6 votes |
@Test public void testNestedStringArrayWithNulls() { DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(new ArrayType(VARCHAR)), "array_field", null, null, false, false, false); Schema schema = SchemaBuilder.array().items().nullable().array().items().nullable().stringType(); List<List<String>> data = Arrays.asList( ImmutableList.of("a", "bb", "ccc"), ImmutableList.of("foo", "bar", "baz", "car"), null, Arrays.asList("boo", "hoo", null, "hoo")); GenericArray<List<String>> list = new GenericData.Array<>(schema, data); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list); checkArrayValue(decodedRow, row, list); }
Example 3
Source File: TestAvroDecoder.java From presto with Apache License 2.0 | 6 votes |
@Test public void testDeeplyNestedLongArray() { Schema schema = SchemaBuilder.array() .items() .array() .items() .array() .items() .longType(); List<List<List<Long>>> data = ImmutableList.<List<List<Long>>>builder() .add(ImmutableList.<List<Long>>builder() .add(ImmutableList.of(12L, 15L, 17L)) .add(ImmutableList.of(22L, 25L, 27L, 29L)) .build()) .build(); GenericArray<List<List<Long>>> list = new GenericData.Array<>(schema, data); DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(new ArrayType(new ArrayType(BIGINT))), "array_field", null, null, false, false, false); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list); checkArrayValue(decodedRow, row, list); }
Example 4
Source File: TestAvroTypeUtil.java From nifi with Apache License 2.0 | 6 votes |
/** * The issue consists on having an Avro's schema with a default value in an * array. See * <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>. * @throws IOException * schema not found. */ @Test public void testDefaultArrayValuesInRecordsCase2() throws IOException { Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayInRecords2.json")); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); Record field1Record = new GenericRecordBuilder(avroSchema.getField("field1").schema()).build(); builder.set("field1", field1Record); Record r = builder.build(); @SuppressWarnings("unchecked") GenericData.Array<Integer> values = (GenericData.Array<Integer>) ((GenericRecord) r.get("field1")) .get("listOfInt"); assertArrayEquals(new Object[] {1,2,3}, values.toArray()); RecordSchema record = AvroTypeUtil.createSchema(avroSchema); RecordField field = record.getField("field1").get(); assertEquals(RecordFieldType.RECORD, field.getDataType().getFieldType()); RecordDataType data = (RecordDataType) field.getDataType(); RecordSchema childSchema = data.getChildSchema(); RecordField childField = childSchema.getField("listOfInt").get(); assertEquals(RecordFieldType.ARRAY, childField.getDataType().getFieldType()); assertTrue(childField.getDefaultValue() instanceof Object[]); assertArrayEquals(new Object[] {1,2,3}, ((Object[]) childField.getDefaultValue())); }
Example 5
Source File: AvroGenericRecordAccessorTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testGetStringArrayUtf8() throws IOException { // Expectation: Even though we read an Avro object with UTF8 underneath, the accessor converts it into a // Java String List<String> expectedQuotes = ImmutableList.of("abc", "defg"); GenericData.Array<Utf8> strings = new GenericData.Array<Utf8>(2, Schema.createArray(Schema.create(Schema.Type.STRING))); expectedQuotes.forEach(s -> strings.add(new Utf8(s))); record.put("favorite_quotes", strings); Assert.assertEquals(accessor.getGeneric("favorite_quotes"), expectedQuotes); }
Example 6
Source File: FastGenericSerializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 5 votes |
private <E> void shouldWriteArrayOfPrimitives(Schema.Type elementType, List<E> data) { // given Schema elementSchema = Schema.create(elementType); Schema arraySchema = Schema.createArray(elementSchema); // Serialization should work on various types of lists GenericData.Array<E> vanillaAvroList = new GenericData.Array<>(0, arraySchema); ArrayList<E> javaList = new ArrayList<>(0); for (E element: data) { vanillaAvroList.add(element); javaList.add(element); } // when List<E> resultFromAvroList = decodeRecord(arraySchema, dataAsBinaryDecoder(vanillaAvroList)); List<E> resultFromJavaList = decodeRecord(arraySchema, dataAsBinaryDecoder(javaList, arraySchema)); List<E> resultFromPrimitiveList = decodeRecord(arraySchema, dataAsBinaryDecoder(data, arraySchema)); // then Assert.assertEquals(resultFromAvroList.size(), data.size()); Assert.assertEquals(resultFromJavaList.size(), data.size()); Assert.assertEquals(resultFromPrimitiveList.size(), data.size()); for (int i = 0; i < data.size(); i++) { Assert.assertEquals(resultFromAvroList.get(i), data.get(i)); Assert.assertEquals(resultFromJavaList.get(i), data.get(i)); Assert.assertEquals(resultFromPrimitiveList.get(i), data.get(i)); } }
Example 7
Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 5 votes |
private <E, L> void shouldReadArrayOfPrimitives(Implementation implementation, Schema.Type elementType, Class<L> expectedListClass, List<E> data) { // given Schema elementSchema = Schema.create(elementType); Schema arraySchema = Schema.createArray(elementSchema); GenericData.Array<E> avroArray = new GenericData.Array<>(0, arraySchema); for (E element: data) { avroArray.add(element); } // when List<E> array = implementation.decode(arraySchema, arraySchema, genericDataAsDecoder(avroArray)); // then Assert.assertEquals(array.size(), data.size()); for (int i = 0; i < data.size(); i++) { Assert.assertEquals(array.get(i), data.get(i)); } if (implementation.isFast) { // The extended API should always be available, regardless of whether warm or cold Assert.assertTrue(Arrays.stream(array.getClass().getInterfaces()).anyMatch(c -> c.equals(expectedListClass)), "The returned type should implement " + expectedListClass.getSimpleName()); try { Method getPrimitiveMethod = expectedListClass.getMethod("getPrimitive", int.class); for (int i = 0; i < data.size(); i++) { Assert.assertEquals(getPrimitiveMethod.invoke(array, i), data.get(i)); } } catch (Exception e) { Assert.fail("Failed to access the getPrimitive function!"); } } }
Example 8
Source File: TestAvroTypeUtil.java From nifi with Apache License 2.0 | 5 votes |
/** * The issue consists on having an Avro's schema with a default value in an * array. See * <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>. * @throws IOException * schema not found. */ @Test public void testDefaultArrayValue1() throws IOException { Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayValue1.json")); GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); Record r = builder.build(); @SuppressWarnings("unchecked") GenericData.Array<Integer> values = (GenericData.Array<Integer>) r.get("listOfInt"); assertEquals(values.size(), 0); RecordSchema record = AvroTypeUtil.createSchema(avroSchema); RecordField field = record.getField("listOfInt").get(); assertEquals(RecordFieldType.ARRAY, field.getDataType().getFieldType()); assertTrue(field.getDefaultValue() instanceof Object[]); assertEquals(0, ((Object[]) field.getDefaultValue()).length); }
Example 9
Source File: KsqlGenericRowAvroSerializerTest.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 5 votes |
@Test public void shouldSerializeRowWithNullCorrectly() { SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); KsqlGenericRowAvroSerializer ksqlGenericRowAvroSerializer = new KsqlGenericRowAvroSerializer (schema, schemaRegistryClient, new KsqlConfig(new HashMap<>())); List columns = Arrays.asList(1511897796092L, 1L, null, 10.0, new Double[]{100.0}, Collections.singletonMap("key1", 100.0)); GenericRow genericRow = new GenericRow(columns); byte[] serializedRow = ksqlGenericRowAvroSerializer.serialize("t1", genericRow); KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient); GenericRecord genericRecord = (GenericRecord) kafkaAvroDeserializer.deserialize("t1", serializedRow); Assert.assertNotNull(genericRecord); assertThat("Incorrect serialization.", genericRecord.get("ordertime".toUpperCase()), equalTo (1511897796092L)); assertThat("Incorrect serialization.", genericRecord.get("orderid".toUpperCase()), equalTo (1L)); assertThat("Incorrect serialization.", genericRecord.get("itemid".toUpperCase()), equalTo (null)); assertThat("Incorrect serialization.", genericRecord.get("orderunits".toUpperCase()), equalTo (10.0)); GenericData.Array array = (GenericData.Array) genericRecord.get("arraycol".toUpperCase()); Map map = (Map) genericRecord.get("mapcol".toUpperCase()); assertThat("Incorrect serialization.", array.size(), equalTo(1)); assertThat("Incorrect serialization.", array.get(0), equalTo(100.0)); assertThat("Incorrect serialization.", map, equalTo(Collections.singletonMap(new Utf8("key1"), 100.0))); }
Example 10
Source File: KsqlGenericRowAvroDeserializer.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 5 votes |
private Object handleArray(Schema fieldSchema, GenericData.Array genericArray) { Class elementClass = SchemaUtil.getJavaType(fieldSchema.valueSchema()); Object[] arrayField = (Object[]) java.lang.reflect.Array.newInstance(elementClass, genericArray.size()); for (int i = 0; i < genericArray.size(); i++) { Object obj = enforceFieldType(fieldSchema.valueSchema(), genericArray.get(i)); arrayField[i] = obj; } return arrayField; }
Example 11
Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation") public void shouldReadArrayOfRecords(Implementation implementation) { // given Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING)); Schema arrayRecordSchema = Schema.createArray(recordSchema); GenericData.Record subRecordBuilder = new GenericData.Record(recordSchema); subRecordBuilder.put("field", "abc"); GenericData.Array<GenericData.Record> recordsArray = new GenericData.Array<>(0, arrayRecordSchema); recordsArray.add(subRecordBuilder); recordsArray.add(subRecordBuilder); // when GenericData.Array<GenericRecord> array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray)); // then Assert.assertEquals(2, array.size()); Assert.assertEquals(new Utf8("abc"), array.get(0).get("field")); Assert.assertEquals(new Utf8("abc"), array.get(1).get("field")); // given arrayRecordSchema = Schema.createArray(createUnionSchema(recordSchema)); subRecordBuilder = new GenericData.Record(recordSchema); subRecordBuilder.put("field", "abc"); recordsArray = new GenericData.Array<>(0, arrayRecordSchema); recordsArray.add(subRecordBuilder); recordsArray.add(subRecordBuilder); // when array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray)); // then Assert.assertEquals(2, array.size()); Assert.assertEquals(new Utf8("abc"), array.get(0).get("field")); Assert.assertEquals(new Utf8("abc"), array.get(1).get("field")); }
Example 12
Source File: AvroParquetMorphlineTest.java From kite with Apache License 2.0 | 4 votes |
@Test public void testAll() throws Exception { Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/all.avsc")); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); GenericData.Record nestedRecord = new GenericRecordBuilder( schema.getField("mynestedrecord").schema()) .set("mynestedint", 1).build(); List<Integer> integerArray = Arrays.asList(1, 2, 3); GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>( Schema.createArray(Schema.create(Schema.Type.INT)), integerArray); GenericFixed genericFixed = new GenericData.Fixed( Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 }); List<Integer> emptyArray = new ArrayList<Integer>(); ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build(); GenericData.Record record = new GenericRecordBuilder(schema) .set("mynull", null) .set("myboolean", true) .set("myint", 1) .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") .set("myarray", genericIntegerArray) .set("myemptyarray", emptyArray) .set("myoptionalarray", genericIntegerArray) .set("mymap", ImmutableMap.of("a", 1, "b", 2)) .set("myemptymap", emptyMap) .set("myfixed", genericFixed) .build(); writer.write(record); writer.close(); morphline = createMorphline("test-morphlines/readAvroParquetFileWithProjectionSubSchema"); Record morphlineRecord = new Record(); morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString()); collector.reset(); assertTrue(morphline.process(morphlineRecord)); assertEquals(1, collector.getRecords().size()); GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY); assertNotNull(actualRecord); assertEquals(null, actualRecord.get("mynull")); assertEquals(true, actualRecord.get("myboolean")); assertEquals(1, actualRecord.get("myint")); assertEquals(2L, actualRecord.get("mylong")); assertEquals(null, actualRecord.get("myfloat")); assertEquals(4.1, actualRecord.get("mydouble")); assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), actualRecord.get("mybytes")); assertEquals("hello", actualRecord.get("mystring")); assertEquals("a", actualRecord.get("myenum")); assertEquals(nestedRecord, actualRecord.get("mynestedrecord")); assertEquals(integerArray, actualRecord.get("myarray")); assertEquals(emptyArray, actualRecord.get("myemptyarray")); assertEquals(integerArray, actualRecord.get("myoptionalarray")); assertEquals(ImmutableMap.of("a", 1, "b", 2), actualRecord.get("mymap")); assertEquals(emptyMap, actualRecord.get("myemptymap")); assertEquals(genericFixed, actualRecord.get("myfixed")); }
Example 13
Source File: TestReadWrite.java From parquet-mr with Apache License 2.0 | 4 votes |
@Test public void testAll() throws Exception { Schema schema = new Schema.Parser().parse( Resources.getResource("all.avsc").openStream()); Path file = new Path(createTempFile().getPath()); List<Integer> integerArray = Arrays.asList(1, 2, 3); GenericData.Record nestedRecord = new GenericRecordBuilder( schema.getField("mynestedrecord").schema()) .set("mynestedint", 1).build(); List<Integer> emptyArray = new ArrayList<Integer>(); Schema arrayOfOptionalIntegers = Schema.createArray( optional(Schema.create(Schema.Type.INT))); GenericData.Array<Integer> genericIntegerArrayWithNulls = new GenericData.Array<Integer>( arrayOfOptionalIntegers, Arrays.asList(1, null, 2, null, 3)); GenericFixed genericFixed = new GenericData.Fixed( Schema.createFixed("fixed", null, null, 1), new byte[]{(byte) 65}); ImmutableMap<String, Integer> emptyMap = new ImmutableMap.Builder<String, Integer>().build(); try(ParquetWriter<GenericRecord> writer = AvroParquetWriter .<GenericRecord>builder(file) .withSchema(schema) .withConf(testConf) .build()) { GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>( Schema.createArray(Schema.create(Schema.Type.INT)), integerArray); GenericData.Record record = new GenericRecordBuilder(schema) .set("mynull", null) .set("myboolean", true) .set("myint", 1) .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") .set("myarray", genericIntegerArray) .set("myemptyarray", emptyArray) .set("myoptionalarray", genericIntegerArray) .set("myarrayofoptional", genericIntegerArrayWithNulls) .set("mymap", ImmutableMap.of("a", 1, "b", 2)) .set("myemptymap", emptyMap) .set("myfixed", genericFixed) .build(); writer.write(record); } final GenericRecord nextRecord; try(AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file)) { nextRecord = reader.read(); } Object expectedEnumSymbol = compat ? "a" : new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a"); assertNotNull(nextRecord); assertEquals(null, nextRecord.get("mynull")); assertEquals(true, nextRecord.get("myboolean")); assertEquals(1, nextRecord.get("myint")); assertEquals(2L, nextRecord.get("mylong")); assertEquals(3.1f, nextRecord.get("myfloat")); assertEquals(4.1, nextRecord.get("mydouble")); assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes")); assertEquals(str("hello"), nextRecord.get("mystring")); assertEquals(expectedEnumSymbol, nextRecord.get("myenum")); assertEquals(nestedRecord, nextRecord.get("mynestedrecord")); assertEquals(integerArray, nextRecord.get("myarray")); assertEquals(emptyArray, nextRecord.get("myemptyarray")); assertEquals(integerArray, nextRecord.get("myoptionalarray")); assertEquals(genericIntegerArrayWithNulls, nextRecord.get("myarrayofoptional")); assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap")); assertEquals(emptyMap, nextRecord.get("myemptymap")); assertEquals(genericFixed, nextRecord.get("myfixed")); }
Example 14
Source File: AvroRowSerializationSchema.java From flink with Apache License 2.0 | 4 votes |
private Object convertFlinkType(Schema schema, Object object) { if (object == null) { return null; } switch (schema.getType()) { case RECORD: if (object instanceof Row) { return convertRowToAvroRecord(schema, (Row) object); } throw new IllegalStateException("Row expected but was: " + object.getClass()); case ENUM: return new GenericData.EnumSymbol(schema, object.toString()); case ARRAY: final Schema elementSchema = schema.getElementType(); final Object[] array = (Object[]) object; final GenericData.Array<Object> convertedArray = new GenericData.Array<>(array.length, schema); for (Object element : array) { convertedArray.add(convertFlinkType(elementSchema, element)); } return convertedArray; case MAP: final Map<?, ?> map = (Map<?, ?>) object; final Map<Utf8, Object> convertedMap = new HashMap<>(); for (Map.Entry<?, ?> entry : map.entrySet()) { convertedMap.put( new Utf8(entry.getKey().toString()), convertFlinkType(schema.getValueType(), entry.getValue())); } return convertedMap; case UNION: final List<Schema> types = schema.getTypes(); final int size = types.size(); final Schema actualSchema; if (size == 2 && types.get(0).getType() == Schema.Type.NULL) { actualSchema = types.get(1); } else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) { actualSchema = types.get(0); } else if (size == 1) { actualSchema = types.get(0); } else { // generic type return object; } return convertFlinkType(actualSchema, object); case FIXED: // check for logical type if (object instanceof BigDecimal) { return new GenericData.Fixed( schema, convertFromDecimal(schema, (BigDecimal) object)); } return new GenericData.Fixed(schema, (byte[]) object); case STRING: return new Utf8(object.toString()); case BYTES: // check for logical type if (object instanceof BigDecimal) { return ByteBuffer.wrap(convertFromDecimal(schema, (BigDecimal) object)); } return ByteBuffer.wrap((byte[]) object); case INT: // check for logical types if (object instanceof Date) { return convertFromDate(schema, (Date) object); } else if (object instanceof LocalDate) { return convertFromDate(schema, Date.valueOf((LocalDate) object)); } else if (object instanceof Time) { return convertFromTime(schema, (Time) object); } else if (object instanceof LocalTime) { return convertFromTime(schema, Time.valueOf((LocalTime) object)); } return object; case LONG: // check for logical type if (object instanceof Timestamp) { return convertFromTimestamp(schema, (Timestamp) object); } else if (object instanceof LocalDateTime) { return convertFromTimestamp(schema, Timestamp.valueOf((LocalDateTime) object)); } return object; case FLOAT: case DOUBLE: case BOOLEAN: return object; } throw new RuntimeException("Unsupported Avro type:" + schema); }
Example 15
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_array_of_records() throws Exception { final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc")); List<GenericRecord> innerRecords = new LinkedList<>(); final GenericRecord outerRecord = new GenericData.Record(schema); Schema arraySchema = schema.getField("records").schema(); Schema innerRecordSchema = arraySchema.getElementType(); final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema); innerRecord1.put("name", "Joe"); innerRecord1.put("age", 42); innerRecords.add(innerRecord1); final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema); innerRecord2.put("name", "Mary"); innerRecord2.put("age", 28); innerRecords.add(innerRecord2); GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords); outerRecord.put("records", array); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, out); dataFileWriter.append(outerRecord); } out.close(); // Build a flow file from the Avro record Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + "(records ARRAY<STRUCT<name:STRING, age:INT>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema)); // Verify the record contains an array Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records")); assertTrue(arrayFieldObject instanceof ArrayList); ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject; assertEquals(2, arrayField.size()); // Verify the first element. Should be a record with two fields "name" and "age" Object element = arrayField.get(0); assertTrue(element instanceof OrcStruct); StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema)); Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Joe", nameObject.toString()); Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(42, ((IntWritable) ageObject).get()); // Verify the first element. Should be a record with two fields "name" and "age" element = arrayField.get(1); assertTrue(element instanceof OrcStruct); nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Mary", nameObject.toString()); ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(28, ((IntWritable) ageObject).get()); }
Example 16
Source File: AvroGenericRecordAccessor.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Override public void setStringArray(String fieldName, List<String> value) { GenericData.Array<String> avroArray = new GenericData.Array<>( Schema.createArray(Schema.create(Schema.Type.STRING)), value); set(fieldName, avroArray); }
Example 17
Source File: CustomAvroRecordPreparer.java From pxf with Apache License 2.0 | 4 votes |
public void deserialize(GenericRecord record) throws IOException { // 1. integers @SuppressWarnings("unchecked") GenericData.Array<Integer> intArray = (GenericData.Array<Integer>) record.get("num"); for (int i = 0; i < intArray.size(); i++) { num[i] = intArray.get(i).intValue(); } int1 = ((Integer) record.get("int1")).intValue(); int2 = ((Integer) record.get("int2")).intValue(); // 2. strings @SuppressWarnings("unchecked") GenericData.Array<Utf8> stringArray = (GenericData.Array<Utf8>) record.get("strings"); for (int i = 0; i < stringArray.size(); i++) { strings[i] = stringArray.get(i).toString(); } st1 = record.get("st1").toString(); // 3. doubles @SuppressWarnings("unchecked") GenericData.Array<Double> doubleArray = (GenericData.Array<Double>) record.get("dubs"); for (int i = 0; i < doubleArray.size(); i++) { dubs[i] = doubleArray.get(i).doubleValue(); } db = ((Double) record.get("db")).doubleValue(); // 4. floats @SuppressWarnings("unchecked") GenericData.Array<Float> floatArray = (GenericData.Array<Float>) record.get("fts"); for (int i = 0; i < floatArray.size(); i++) { fts[i] = floatArray.get(i).floatValue(); } ft = ((Float) record.get("ft")).floatValue(); // 5. longs @SuppressWarnings("unchecked") GenericData.Array<Long> longArray = (GenericData.Array<Long>) record.get("lngs"); for (int i = 0; i < longArray.size(); i++) { lngs[i] = longArray.get(i).longValue(); } lng = ((Long) record.get("lng")).longValue(); // 6. booleans @SuppressWarnings("unchecked") GenericData.Array<Boolean> booleanArray = (GenericData.Array<Boolean>) record.get("bls"); for (int i = 0; i < booleanArray.size(); i++) { bls[i] = booleanArray.get(i); } bl = (Boolean) record.get("bl"); // 7. bytes ByteBuffer bytesBuffer = (ByteBuffer) record.get("bts"); bts = bytesBuffer.array(); }
Example 18
Source File: HoodieTestDataGenerator.java From hudi with Apache License 2.0 | 4 votes |
public static GenericRecord generateGenericRecord(String rowKey, String riderName, String driverName, double timestamp, boolean isDeleteRecord, boolean isFlattened) { GenericRecord rec = new GenericData.Record(isFlattened ? FLATTENED_AVRO_SCHEMA : AVRO_SCHEMA); rec.put("_row_key", rowKey); rec.put("timestamp", timestamp); rec.put("rider", riderName); rec.put("driver", driverName); rec.put("begin_lat", RAND.nextDouble()); rec.put("begin_lon", RAND.nextDouble()); rec.put("end_lat", RAND.nextDouble()); rec.put("end_lon", RAND.nextDouble()); if (isFlattened) { rec.put("fare", RAND.nextDouble() * 100); rec.put("currency", "USD"); } else { rec.put("distance_in_meters", RAND.nextInt()); rec.put("seconds_since_epoch", RAND.nextLong()); rec.put("weight", RAND.nextFloat()); byte[] bytes = "Canada".getBytes(); rec.put("nation", ByteBuffer.wrap(bytes)); long currentTimeMillis = System.currentTimeMillis(); Date date = new Date(currentTimeMillis); rec.put("current_date", (int) date.toLocalDate().toEpochDay()); rec.put("current_ts", currentTimeMillis); BigDecimal bigDecimal = new BigDecimal(String.format("%5f", RAND.nextFloat())); Schema decimalSchema = AVRO_SCHEMA.getField("height").schema(); Conversions.DecimalConversion decimalConversions = new Conversions.DecimalConversion(); GenericFixed genericFixed = decimalConversions.toFixed(bigDecimal, decimalSchema, LogicalTypes.decimal(10, 6)); rec.put("height", genericFixed); rec.put("city_to_state", Collections.singletonMap("LA", "CA")); GenericRecord fareRecord = new GenericData.Record(AVRO_SCHEMA.getField("fare").schema()); fareRecord.put("amount", RAND.nextDouble() * 100); fareRecord.put("currency", "USD"); rec.put("fare", fareRecord); GenericArray<GenericRecord> tipHistoryArray = new GenericData.Array<>(1, AVRO_SCHEMA.getField("tip_history").schema()); Schema tipSchema = new Schema.Parser().parse(AVRO_SCHEMA.getField("tip_history").schema().toString()).getElementType(); GenericRecord tipRecord = new GenericData.Record(tipSchema); tipRecord.put("amount", RAND.nextDouble() * 100); tipRecord.put("currency", "USD"); tipHistoryArray.add(tipRecord); rec.put("tip_history", tipHistoryArray); } if (isDeleteRecord) { rec.put("_hoodie_is_deleted", true); } else { rec.put("_hoodie_is_deleted", false); } return rec; }
Example 19
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_array_of_records() throws Exception { final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc")); List<GenericRecord> innerRecords = new LinkedList<>(); final GenericRecord outerRecord = new GenericData.Record(schema); Schema arraySchema = schema.getField("records").schema(); Schema innerRecordSchema = arraySchema.getElementType(); final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema); innerRecord1.put("name", "Joe"); innerRecord1.put("age", 42); innerRecords.add(innerRecord1); final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema); innerRecord2.put("name", "Mary"); innerRecord2.put("age", 28); innerRecords.add(innerRecord2); GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords); outerRecord.put("records", array); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, out); dataFileWriter.append(outerRecord); } out.close(); // Build a flow file from the Avro record Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + "(records ARRAY<STRUCT<name:STRING, age:INT>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema)); // Verify the record contains an array Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records")); assertTrue(arrayFieldObject instanceof ArrayList); ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject; assertEquals(2, arrayField.size()); // Verify the first element. Should be a record with two fields "name" and "age" Object element = arrayField.get(0); assertTrue(element instanceof OrcStruct); StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema)); Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Joe", nameObject.toString()); Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(42, ((IntWritable) ageObject).get()); // Verify the first element. Should be a record with two fields "name" and "age" element = arrayField.get(1); assertTrue(element instanceof OrcStruct); nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Mary", nameObject.toString()); ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(28, ((IntWritable) ageObject).get()); }
Example 20
Source File: CustomAvroRecordPreparer.java From pxf with Apache License 2.0 | 4 votes |
@Override public GenericRecord serialize() throws IOException { // 1. num, int1, int2 Schema.Field field = schema.getField("num"); Schema fieldSchema = field.schema(); GenericData.Array<Integer> intArray = new GenericData.Array<Integer>( num.length, fieldSchema); for (int i = 0; i < num.length; i++) { intArray.add(new Integer(num[i])); } datum.put("num", intArray); datum.put("int1", int1); datum.put("int2", int2); // 2. st1 field = schema.getField("strings"); fieldSchema = field.schema(); GenericData.Array<Utf8> stringArray = new GenericData.Array<Utf8>( strings.length, fieldSchema); for (int i = 0; i < strings.length; i++) { stringArray.add(new Utf8(strings[i])); } datum.put("strings", stringArray); datum.put("st1", st1); // 3. doubles field = schema.getField("dubs"); fieldSchema = field.schema(); GenericData.Array<Double> doubleArray = new GenericData.Array<Double>( dubs.length, fieldSchema); for (int i = 0; i < dubs.length; i++) { doubleArray.add(new Double(dubs[i])); } datum.put("dubs", doubleArray); datum.put("db", db); // 4. floats field = schema.getField("fts"); fieldSchema = field.schema(); GenericData.Array<Float> floatArray = new GenericData.Array<Float>( fts.length, fieldSchema); for (int i = 0; i < fts.length; i++) { floatArray.add(new Float(fts[i])); } datum.put("fts", floatArray); datum.put("ft", ft); // 5. longs field = schema.getField("lngs"); fieldSchema = field.schema(); GenericData.Array<Long> longArray = new GenericData.Array<Long>( lngs.length, fieldSchema); for (int i = 0; i < lngs.length; i++) { longArray.add(lngs[i]); } datum.put("lngs", longArray); datum.put("lng", lng); // 6. booleans field = schema.getField("bls"); fieldSchema = field.schema(); GenericData.Array<Boolean> booleanArray = new GenericData.Array<Boolean>( bls.length, fieldSchema); for (int i = 0; i < bls.length; i++) { booleanArray.add(bls[i]); } datum.put("bls", booleanArray); datum.put("bl", bl); // 7. bytes ByteBuffer byteBuffer = ByteBuffer.wrap(bts); datum.put("bts", byteBuffer); return datum; }