Java Code Examples for org.apache.avro.Schema#createArray()
The following examples show how to use
org.apache.avro.Schema#createArray() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenericDataTSVTest.java From iow-hadoop-streaming with Apache License 2.0 | 6 votes |
@Test public void testToString() throws Exception { Schema s1 = p.parse(sc1); GenericData.Record r2 = new GenericData.Record(s1); r2.put("x", 25); r2.put("y", "wtf"); GenericData.Array<Integer> a = new GenericData.Array<Integer>(3, Schema.createArray(Schema.create(Schema.Type.INT))); a.add(1); a.add(3); a.add(4); r2.put("a", a); String tsv = gd.toString(r2); Assert.assertNotNull(tsv); Assert.assertEquals("25\twtf\t\t[1, 3, 4]", tsv); }
Example 2
Source File: FastSpecificSerializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test public void shouldSerializeNullElementInArray() { // given Schema arrayRecordSchema = Schema.createArray(Schema.createUnion( Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT))); List<Object> records = new ArrayList<>(); records.add("0"); records.add(null); records.add(2); // when List<Object> array = deserializeSpecific(arrayRecordSchema, serializeSpecificFast(records, arrayRecordSchema)); // then Assert.assertEquals(3, array.size()); Assert.assertEquals(new Utf8("0"), array.get(0)); Assert.assertNull(array.get(1)); Assert.assertEquals(2, array.get(2)); }
Example 3
Source File: GenericDataTSVTest.java From iow-hadoop-streaming with Apache License 2.0 | 6 votes |
@Test public void testGetDatum2() throws Exception { // checking unions Schema s3 = p.parse(sc3); GenericData.Record r1 = gd.getDatum("wtf\t\t[1,3,7]\t\t\t[1,3,7]", s3); GenericData.Record r2 = new GenericData.Record(s3); r2.put("x","wtf"); r2.put("x2",""); GenericData.Array<Integer> a2 = new GenericData.Array<Integer>(3,Schema.createArray(Schema.create(Schema.Type.INT))); a2.add(1); a2.add(3); a2.add(7); r2.put("a", a2); r2.put("a4", a2); Assert.assertNotNull(r1); Assert.assertNotNull(r2); Assert.assertEquals(r1, r2); }
Example 4
Source File: TestAvroStorageUtils.java From spork with Apache License 2.0 | 6 votes |
@Test public void testMergeSchema3() throws IOException { Schema complexType[] = { Schema.createRecord(new ArrayList<Schema.Field>()), Schema.createArray(Schema.create(Schema.Type.INT)), Schema.createMap(Schema.create(Schema.Type.INT)), Schema.createUnion(new ArrayList<Schema>()), Schema.createFixed("fixed", null, null, 1), }; for (int i = 0; i < complexType.length; i++) { Schema x = complexType[i]; for (int j = 0; j < complexType.length; j++) { Schema y = complexType[j]; if (i != j) { try { Schema z = AvroStorageUtils.mergeSchema(x, y); Assert.fail("exception is expected, but " + z.getType() + " is returned"); } catch (IOException e) { assertEquals("Cannot merge "+ x.getType()+ " with "+ y.getType(), e.getMessage()); } } } } }
Example 5
Source File: GenericDataTSV.java From iow-hadoop-streaming with Apache License 2.0 | 6 votes |
private Array<java.io.Serializable> createArray(Schema type, String t) throws IOException, JsonProcessingException { ObjectMapper mapper = new ObjectMapper(); JsonNode node = mapper.readTree(t); Iterator <JsonNode> i = node.iterator(); Array<java.io.Serializable> arr = new GenericData.Array<java.io.Serializable>(node.size(), Schema.createArray(type)); while(i.hasNext()) { switch (type.getType()) { case INT: arr.add(i.next().getIntValue()); break; case FLOAT: case DOUBLE: arr.add(i.next().getDoubleValue()); break; default: arr.add(i.next().getTextValue()); // No array-of-objects! } } return arr; }
Example 6
Source File: TestAvroSchemaConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testOptionalArrayElement() throws Exception { Schema schema = Schema.createRecord("record1", null, null, false); Schema optionalIntArray = Schema.createArray(optional(Schema.create(INT))); schema.setFields(Arrays.asList( new Schema.Field("myintarray", optionalIntArray, null, null) )); testRoundTripConversion( NEW_BEHAVIOR, schema, "message record1 {\n" + " required group myintarray (LIST) {\n" + " repeated group list {\n" + " optional int32 element;\n" + " }\n" + " }\n" + "}\n"); }
Example 7
Source File: FastGenericDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 5 votes |
private <E, L> void shouldReadArrayOfPrimitives(Implementation implementation, Schema.Type elementType, Class<L> expectedListClass, List<E> data) { // given Schema elementSchema = Schema.create(elementType); Schema arraySchema = Schema.createArray(elementSchema); GenericData.Array<E> avroArray = new GenericData.Array<>(0, arraySchema); for (E element: data) { avroArray.add(element); } // when List<E> array = implementation.decode(arraySchema, arraySchema, genericDataAsDecoder(avroArray)); // then Assert.assertEquals(array.size(), data.size()); for (int i = 0; i < data.size(); i++) { Assert.assertEquals(array.get(i), data.get(i)); } if (implementation.isFast) { // The extended API should always be available, regardless of whether warm or cold Assert.assertTrue(Arrays.stream(array.getClass().getInterfaces()).anyMatch(c -> c.equals(expectedListClass)), "The returned type should implement " + expectedListClass.getSimpleName()); try { Method getPrimitiveMethod = expectedListClass.getMethod("getPrimitive", int.class); for (int i = 0; i < data.size(); i++) { Assert.assertEquals(getPrimitiveMethod.invoke(array, i), data.get(i)); } } catch (Exception e) { Assert.fail("Failed to access the getPrimitive function!"); } } }
Example 8
Source File: AvroGenericRecordAccessorTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Test public void testGetStringArrayUtf8() throws IOException { // Expectation: Even though we read an Avro object with UTF8 underneath, the accessor converts it into a // Java String List<String> expectedQuotes = ImmutableList.of("abc", "defg"); GenericData.Array<Utf8> strings = new GenericData.Array<Utf8>(2, Schema.createArray(Schema.create(Schema.Type.STRING))); expectedQuotes.forEach(s -> strings.add(new Utf8(s))); record.put("favorite_quotes", strings); Assert.assertEquals(accessor.getGeneric("favorite_quotes"), expectedQuotes); }
Example 9
Source File: BigQueryAvroUtils.java From beam with Apache License 2.0 | 5 votes |
private static Field convertField(TableFieldSchema bigQueryField) { ImmutableCollection<Type> avroTypes = BIG_QUERY_TO_AVRO_TYPES.get(bigQueryField.getType()); if (avroTypes.isEmpty()) { throw new IllegalArgumentException( "Unable to map BigQuery field type " + bigQueryField.getType() + " to avro type."); } Type avroType = avroTypes.iterator().next(); Schema elementSchema; if (avroType == Type.RECORD) { elementSchema = toGenericAvroSchema(bigQueryField.getName(), bigQueryField.getFields()); } else { elementSchema = Schema.create(avroType); } Schema fieldSchema; if (bigQueryField.getMode() == null || "NULLABLE".equals(bigQueryField.getMode())) { fieldSchema = Schema.createUnion(Schema.create(Type.NULL), elementSchema); } else if ("REQUIRED".equals(bigQueryField.getMode())) { fieldSchema = elementSchema; } else if ("REPEATED".equals(bigQueryField.getMode())) { fieldSchema = Schema.createArray(elementSchema); } else { throw new IllegalArgumentException( String.format("Unknown BigQuery Field Mode: %s", bigQueryField.getMode())); } return new Field( bigQueryField.getName(), fieldSchema, bigQueryField.getDescription(), (Object) null /* Cast to avoid deprecated JsonNode constructor. */); }
Example 10
Source File: AvroStringFieldEncryptorConverterTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
private GenericArray<String> buildTestArray() { Schema s = Schema.createArray(Schema.create(Schema.Type.STRING)); GenericArray<String> arr = new GenericData.Array<>(3, s); arr.add("one"); arr.add("two"); arr.add("three"); return arr; }
Example 11
Source File: AvroJson.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public Schema array(ArrayNode ignored, List<Schema> elementSchemas) { // use LinkedHashSet to preserve schema order switch (elementSchemas.size()) { case 0: return Schema.createArray(Schema.create(Schema.Type.NULL)); case 1: return Schema.createArray(Iterables.getOnlyElement(elementSchemas)); default: return Schema.createArray(Schemas.mergeOrUnion(elementSchemas)); } }
Example 12
Source File: TestTableConversion.java From kite with Apache License 2.0 | 5 votes |
@Test public void testConvertMaps() { TypeInfo mapOfLongsType = parseTypeInfo("map<string,bigint>"); Schema mapOfLongsSchema = Schema.createMap( optional(Schema.create(Schema.Type.LONG))); Assert.assertEquals("Should convert map of primitive", mapOfLongsSchema, HiveSchemaConverter.convert( startPath, "test", mapOfLongsType, NO_REQUIRED_FIELDS)); TypeInfo mapOfArraysType = parseTypeInfo("array<float>"); Schema mapOfArraysSchema = Schema.createArray( optional(Schema.create(Schema.Type.FLOAT))); Assert.assertEquals("Should convert map of arrays", mapOfArraysSchema, HiveSchemaConverter.convert( startPath, "test", mapOfArraysType, NO_REQUIRED_FIELDS)); TypeInfo mapOfMapsType = parseTypeInfo( "array<map<string,map<string,bigint>>>"); Schema mapOfMapsSchema = Schema.createArray( optional(Schema.createMap(optional(mapOfLongsSchema)))); Assert.assertEquals("Should convert map of maps", mapOfMapsSchema, HiveSchemaConverter.convert( startPath, "test", mapOfMapsType, NO_REQUIRED_FIELDS)); TypeInfo mapOfStructsType = parseTypeInfo("map<string," + "struct<a:array<float>,b:array<map<string,map<string,bigint>>>>>"); Schema recordSchema = Schema.createRecord("test", null, null, false); recordSchema.setFields(Lists.newArrayList( new Schema.Field("a", optional(mapOfArraysSchema), null, NULL_DEFAULT), new Schema.Field("b", optional(mapOfMapsSchema), null, NULL_DEFAULT) )); Schema mapOfStructsSchema = Schema.createMap(optional(recordSchema)); Assert.assertEquals("Should convert map of structs", mapOfStructsSchema, HiveSchemaConverter.convert( startPath, "test", mapOfStructsType, NO_REQUIRED_FIELDS)); }
Example 13
Source File: FastSpecificDeserializerGeneratorTest.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer") public void shouldReadArrayOfRecords(Boolean whetherUseFastDeserializer) { // given Schema arrayRecordSchema = Schema.createArray(TestRecord.SCHEMA$); TestRecord testRecord = emptyTestRecord(); testRecord.testStringUnion = "abc"; List<TestRecord> recordsArray = new ArrayList<>(); recordsArray.add(testRecord); recordsArray.add(testRecord); // when List<TestRecord> array = null; if (whetherUseFastDeserializer) { array = decodeRecordFast(arrayRecordSchema, arrayRecordSchema, specificDataAsDecoder(recordsArray, arrayRecordSchema)); } else { array = decodeRecordSlow(arrayRecordSchema, arrayRecordSchema, specificDataAsDecoder(recordsArray, arrayRecordSchema)); } // then Assert.assertEquals(2, array.size()); Assert.assertEquals(new Utf8("abc"), array.get(0).testStringUnion); Assert.assertEquals(new Utf8("abc"), array.get(1).testStringUnion); // given testRecord = emptyTestRecord(); testRecord.testStringUnion = "abc"; arrayRecordSchema = Schema.createArray(createUnionSchema(TestRecord.SCHEMA$)); recordsArray = new ArrayList<>(); recordsArray.add(testRecord); recordsArray.add(testRecord); // when if (whetherUseFastDeserializer) { array = decodeRecordFast(arrayRecordSchema, arrayRecordSchema, specificDataAsDecoder(recordsArray, arrayRecordSchema)); } else { array = decodeRecordSlow(arrayRecordSchema, arrayRecordSchema, specificDataAsDecoder(recordsArray, arrayRecordSchema)); } // then Assert.assertEquals(2, array.size()); Assert.assertEquals(new Utf8("abc"), array.get(0).testStringUnion); Assert.assertEquals(new Utf8("abc"), array.get(1).testStringUnion); }
Example 14
Source File: JsonElementConversionFactory.java From incubator-gobblin with Apache License 2.0 | 4 votes |
private Schema arraySchema() { Schema schema = Schema.createArray(getElementConverter().schema()); schema.addProp(SOURCE_TYPE, ARRAY.toString().toLowerCase()); return schema; }
Example 15
Source File: AvroFlattener.java From incubator-gobblin with Apache License 2.0 | 4 votes |
/*** * Flatten the Schema to un-nest recursive Records (to make it optimal for ORC) * @param schema Schema to flatten * @param shouldPopulateLineage is set to true if the field is going to be flattened and moved up the hierarchy - * so that lineage information can be tagged to it; which happens when there is a * Record within a Record OR Record within Option within Record and so on, * however not when there is a Record within Map or Array * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Avro Schema */ private Schema flatten(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) { Schema flattenedSchema; // Process all Schema Types // (Primitives are simply cloned) switch (schema.getType()) { case ARRAY: // Array might be an array of recursive Records, flatten them if (flattenComplexTypes) { flattenedSchema = Schema.createArray(flatten(schema.getElementType(), false)); } else { flattenedSchema = Schema.createArray(schema.getElementType()); } break; case BOOLEAN: flattenedSchema = Schema.create(schema.getType()); break; case BYTES: flattenedSchema = Schema.create(schema.getType()); break; case DOUBLE: flattenedSchema = Schema.create(schema.getType()); break; case ENUM: flattenedSchema = Schema.createEnum(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getEnumSymbols()); break; case FIXED: flattenedSchema = Schema.createFixed(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getFixedSize()); break; case FLOAT: flattenedSchema = Schema.create(schema.getType()); break; case INT: flattenedSchema = Schema.create(schema.getType()); break; case LONG: flattenedSchema = Schema.create(schema.getType()); break; case MAP: if (flattenComplexTypes) { flattenedSchema = Schema.createMap(flatten(schema.getValueType(), false)); } else { flattenedSchema = Schema.createMap(schema.getValueType()); } break; case NULL: flattenedSchema = Schema.create(schema.getType()); break; case RECORD: flattenedSchema = flattenRecord(schema, shouldPopulateLineage, flattenComplexTypes); break; case STRING: flattenedSchema = Schema.create(schema.getType()); break; case UNION: flattenedSchema = flattenUnion(schema, shouldPopulateLineage, flattenComplexTypes); break; default: String exceptionMessage = String.format("Schema flattening failed for \"%s\" ", schema); LOG.error(exceptionMessage); throw new AvroRuntimeException(exceptionMessage); } // Copy schema metadata copyProperties(schema, flattenedSchema); return flattenedSchema; }
Example 16
Source File: AvroTypeSystem.java From transport with BSD 2-Clause "Simplified" License | 4 votes |
@Override protected Schema createArrayType(Schema elementType) { return Schema.createArray(elementType); }
Example 17
Source File: SchemaUtil.java From kite with Apache License 2.0 | 4 votes |
/** * Merges two {@link Schema} instances or returns {@code null}. * <p> * The two schemas are merged if they are the same type. Records are merged * if the two records have the same name or have no names but have a * significant number of shared fields. * <p> * @see {@link #mergeOrUnion} to return a union when a merge is not possible. * * @param left a {@code Schema} * @param right a {@code Schema} * @return a merged {@code Schema} or {@code null} if merging is not possible */ private static Schema mergeOnly(Schema left, Schema right) { if (Objects.equal(left, right)) { return left; } // handle primitive type promotion; doesn't promote integers to floats switch (left.getType()) { case INT: if (right.getType() == Schema.Type.LONG) { return right; } break; case LONG: if (right.getType() == Schema.Type.INT) { return left; } break; case FLOAT: if (right.getType() == Schema.Type.DOUBLE) { return right; } break; case DOUBLE: if (right.getType() == Schema.Type.FLOAT) { return left; } } // any other cases where the types don't match must be combined by a union if (left.getType() != right.getType()) { return null; } switch (left.getType()) { case UNION: return union(left, right); case RECORD: if (left.getName() == null && right.getName() == null && fieldSimilarity(left, right) < SIMILARITY_THRESH) { return null; } else if (!Objects.equal(left.getName(), right.getName())) { return null; } Schema combinedRecord = Schema.createRecord( coalesce(left.getName(), right.getName()), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), false ); combinedRecord.setFields(mergeFields(left, right)); return combinedRecord; case MAP: return Schema.createMap( mergeOrUnion(left.getValueType(), right.getValueType())); case ARRAY: return Schema.createArray( mergeOrUnion(left.getElementType(), right.getElementType())); case ENUM: if (!Objects.equal(left.getName(), right.getName())) { return null; } Set<String> symbols = Sets.newLinkedHashSet(); symbols.addAll(left.getEnumSymbols()); symbols.addAll(right.getEnumSymbols()); return Schema.createEnum( left.getName(), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), ImmutableList.copyOf(symbols) ); default: // all primitives are handled before the switch by the equality check. // schemas that reach this point are not primitives and also not any of // the above known types. throw new UnsupportedOperationException( "Unknown schema type: " + left.getType()); } }
Example 18
Source File: FastSpecificDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 4 votes |
@Test public void shouldReadArrayOfRecords() { // given Schema arrayRecordSchema = Schema.createArray(TestRecord.getClassSchema()); TestRecord testRecord = emptyTestRecord(); testRecord.put("testStringUnion", "abc"); List<TestRecord> recordsArray = new ArrayList<>(); recordsArray.add(testRecord); recordsArray.add(testRecord); // when List<TestRecord> array = deserializeSpecificFast(arrayRecordSchema, arrayRecordSchema, serializeSpecific(recordsArray, arrayRecordSchema)); // then Assert.assertEquals(2, array.size()); Assert.assertEquals("abc", array.get(0).get("testStringUnion")); Assert.assertEquals("abc", array.get(1).get("testStringUnion")); // given testRecord = emptyTestRecord(); testRecord.put("testStringUnion", "abc"); arrayRecordSchema = Schema.createArray(createUnionSchema(TestRecord .getClassSchema())); recordsArray = new ArrayList<>(); recordsArray.add(testRecord); recordsArray.add(testRecord); // when array = deserializeSpecificFast(arrayRecordSchema, arrayRecordSchema, serializeSpecific(recordsArray, arrayRecordSchema)); // then Assert.assertEquals(2, array.size()); Assert.assertEquals("abc", array.get(0).get("testStringUnion")); Assert.assertEquals("abc", array.get(1).get("testStringUnion")); }
Example 19
Source File: AvroParquetMorphlineTest.java From kite with Apache License 2.0 | 4 votes |
@Test public void testAll() throws Exception { Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/all.avsc")); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); GenericData.Record nestedRecord = new GenericRecordBuilder( schema.getField("mynestedrecord").schema()) .set("mynestedint", 1).build(); List<Integer> integerArray = Arrays.asList(1, 2, 3); GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>( Schema.createArray(Schema.create(Schema.Type.INT)), integerArray); GenericFixed genericFixed = new GenericData.Fixed( Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 }); List<Integer> emptyArray = new ArrayList<Integer>(); ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build(); GenericData.Record record = new GenericRecordBuilder(schema) .set("mynull", null) .set("myboolean", true) .set("myint", 1) .set("mylong", 2L) .set("myfloat", 3.1f) .set("mydouble", 4.1) .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8))) .set("mystring", "hello") .set("mynestedrecord", nestedRecord) .set("myenum", "a") .set("myarray", genericIntegerArray) .set("myemptyarray", emptyArray) .set("myoptionalarray", genericIntegerArray) .set("mymap", ImmutableMap.of("a", 1, "b", 2)) .set("myemptymap", emptyMap) .set("myfixed", genericFixed) .build(); writer.write(record); writer.close(); morphline = createMorphline("test-morphlines/readAvroParquetFileWithProjectionSubSchema"); Record morphlineRecord = new Record(); morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString()); collector.reset(); assertTrue(morphline.process(morphlineRecord)); assertEquals(1, collector.getRecords().size()); GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY); assertNotNull(actualRecord); assertEquals(null, actualRecord.get("mynull")); assertEquals(true, actualRecord.get("myboolean")); assertEquals(1, actualRecord.get("myint")); assertEquals(2L, actualRecord.get("mylong")); assertEquals(null, actualRecord.get("myfloat")); assertEquals(4.1, actualRecord.get("mydouble")); assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), actualRecord.get("mybytes")); assertEquals("hello", actualRecord.get("mystring")); assertEquals("a", actualRecord.get("myenum")); assertEquals(nestedRecord, actualRecord.get("mynestedrecord")); assertEquals(integerArray, actualRecord.get("myarray")); assertEquals(emptyArray, actualRecord.get("myemptyarray")); assertEquals(integerArray, actualRecord.get("myoptionalarray")); assertEquals(ImmutableMap.of("a", 1, "b", 2), actualRecord.get("mymap")); assertEquals(emptyMap, actualRecord.get("myemptymap")); assertEquals(genericFixed, actualRecord.get("myfixed")); }
Example 20
Source File: AvroTestUtil.java From parquet-mr with Apache License 2.0 | 4 votes |
public static Schema array(Schema element) { return Schema.createArray(element); }