Java Code Examples for org.apache.avro.Schema#createEnum()
The following examples show how to use
org.apache.avro.Schema#createEnum() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroResolverTest.java From pxf with Apache License 2.0 | 5 votes |
private Schema createEnum(String name, String[] symbols) { List<String> values = new ArrayList<>(); for (String sym : symbols) { values.add(sym); } return Schema.createEnum("enum", "", null, values); }
Example 2
Source File: TestParquetExport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public void testSupportedParquetTypes() throws IOException, SQLException { String[] argv = {}; final int TOTAL_RECORDS = 1 * 10; byte[] b = new byte[] { (byte) 1, (byte) 2 }; Schema fixed = Schema.createFixed("myfixed", null, null, 2); Schema enumeration = Schema.createEnum("myenum", null, null, Lists.newArrayList("a", "b")); ColumnGenerator[] gens = new ColumnGenerator[] { colGenerator(true, Schema.create(Schema.Type.BOOLEAN), true, "BIT"), colGenerator(100, Schema.create(Schema.Type.INT), 100, "INTEGER"), colGenerator(200L, Schema.create(Schema.Type.LONG), 200L, "BIGINT"), // HSQLDB maps REAL to double, not float: colGenerator(1.0f, Schema.create(Schema.Type.FLOAT), 1.0d, "REAL"), colGenerator(2.0d, Schema.create(Schema.Type.DOUBLE), 2.0d, "DOUBLE"), colGenerator("s", Schema.create(Schema.Type.STRING), "s", "VARCHAR(8)"), colGenerator(ByteBuffer.wrap(b), Schema.create(Schema.Type.BYTES), b, "VARBINARY(8)"), colGenerator(new GenericData.Fixed(fixed, b), fixed, b, "BINARY(2)"), colGenerator(new GenericData.EnumSymbol(enumeration, "a"), enumeration, "a", "VARCHAR(8)"), }; createParquetFile(0, TOTAL_RECORDS, gens); createTable(gens); runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1))); verifyExport(TOTAL_RECORDS); for (int i = 0; i < gens.length; i++) { assertColMinAndMax(forIdx(i), gens[i]); } }
Example 3
Source File: TestAvroExport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public void testSupportedAvroTypes() throws IOException, SQLException { String[] argv = {}; final int TOTAL_RECORDS = 1 * 10; byte[] b = new byte[] { (byte) 1, (byte) 2 }; Schema fixed = Schema.createFixed("myfixed", null, null, 2); Schema enumeration = Schema.createEnum("myenum", null, null, Lists.newArrayList("a", "b")); ColumnGenerator[] gens = new ColumnGenerator[] { colGenerator(true, Schema.create(Schema.Type.BOOLEAN), true, "BIT"), colGenerator(100, Schema.create(Schema.Type.INT), 100, "INTEGER"), colGenerator(200L, Schema.create(Schema.Type.LONG), 200L, "BIGINT"), // HSQLDB maps REAL to double, not float: colGenerator(1.0f, Schema.create(Schema.Type.FLOAT), 1.0d, "REAL"), colGenerator(2.0d, Schema.create(Schema.Type.DOUBLE), 2.0d, "DOUBLE"), colGenerator("s", Schema.create(Schema.Type.STRING), "s", "VARCHAR(8)"), colGenerator(ByteBuffer.wrap(b), Schema.create(Schema.Type.BYTES), b, "VARBINARY(8)"), colGenerator(new GenericData.Fixed(fixed, b), fixed, b, "BINARY(2)"), colGenerator(new GenericData.EnumSymbol(enumeration, "a"), enumeration, "a", "VARCHAR(8)"), }; createAvroFile(0, TOTAL_RECORDS, gens); createTable(gens); runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1))); verifyExport(TOTAL_RECORDS); for (int i = 0; i < gens.length; i++) { assertColMinAndMax(forIdx(i), gens[i]); } }
Example 4
Source File: FastDeserializerDefaultsTest.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer") @SuppressWarnings("unchecked") public void shouldReadGenericDefaults(Boolean whetherUseFastDeserializer) throws IOException { // given Schema oldRecordSchema = Schema.parse(this.getClass().getResourceAsStream("/schema/defaultsTestOld.avsc")); GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema); GenericData.Record oldSubRecord = new GenericData.Record(oldRecordSchema.getField("oldSubRecord").schema()); oldSubRecord.put("oldSubField", new Utf8("testValueOfSubField")); oldSubRecord.put("fieldToBeRemoved", 33); oldRecord.put("oldSubRecord", oldSubRecord); // when GenericRecord testRecord = null; if (whetherUseFastDeserializer) { testRecord = decodeGenericFast(DefaultsTestRecord.SCHEMA$, oldRecordSchema, genericDataAsDecoder(oldRecord)); } else { testRecord = decodeGenericSlow(DefaultsTestRecord.SCHEMA$, oldRecordSchema, genericDataAsDecoder(oldRecord)); } // then Assert.assertEquals(oldSubRecord.get("oldSubField"), ((GenericData.Record) testRecord.get("oldSubRecord")).get("oldSubField")); Assert.assertEquals(new Utf8("defaultOldSubField"), ((GenericData.Record) testRecord.get("newFieldWithOldSubRecord")).get("oldSubField")); Assert.assertEquals(42, (int) testRecord.get("testInt")); Assert.assertNull(testRecord.get("testIntUnion")); Assert.assertEquals(9223372036854775807L, (long) testRecord.get("testLong")); Assert.assertNull(testRecord.get("testLongUnion")); Assert.assertEquals(3.14d, (double) testRecord.get("testDouble"), 0); Assert.assertNull(testRecord.get("testDoubleUnion")); Assert.assertEquals(3.14f, (float) testRecord.get("testFloat"), 0); Assert.assertNull(testRecord.get("testFloatUnion")); Assert.assertEquals(true, testRecord.get("testBoolean")); Assert.assertNull(testRecord.get("testBooleanUnion")); Assert.assertEquals(ByteBuffer.wrap("1234".getBytes()), testRecord.get("testBytes")); Assert.assertNull(testRecord.get("testBytesUnion")); Assert.assertEquals(new Utf8("testStringValue"), testRecord.get("testString")); Assert.assertEquals(new Utf8("http://www.example.com"), testRecord.get("testStringable")); Assert.assertNull(testRecord.get("testStringUnion")); Schema fixedSchema = Schema.createFixed("DefaultsFixed", "", "", 1); GenericData.Fixed expectedFixed1 = AvroCompatibilityHelper.newFixedField(fixedSchema, new byte[]{(byte) '5'}); Assert.assertEquals(expectedFixed1, testRecord.get("testFixed")); Assert.assertNull(testRecord.get("testFixedUnion")); GenericData.Fixed expectedFixed2 = AvroCompatibilityHelper.newFixedField(fixedSchema, new byte[]{(byte) '6'}); Assert.assertTrue(Arrays.asList(expectedFixed2).equals(testRecord.get("testFixedArray"))); List listWithNull = new LinkedList(); listWithNull.add(null); Assert.assertTrue(listWithNull.equals(testRecord.get("testFixedUnionArray"))); Assert.assertEquals("C", testRecord.get("testEnum").toString()); Assert.assertNull(testRecord.get("testEnumUnion")); Schema enumSchema = Schema.createEnum("DefaultsNewEnum", "", "", Arrays.asList("A", "B")); Assert.assertTrue(Arrays.asList(Arrays.asList(AvroCompatibilityHelper.newEnumSymbol(enumSchema, "B"))) .equals(testRecord.get("testNewEnumIntUnionArray"))); Assert.assertEquals("E", ((List<GenericData.EnumSymbol>) testRecord.get("testEnumArray")).get(0).toString()); Assert.assertEquals("B", ((List<GenericData.EnumSymbol>) testRecord.get("testEnumArray")).get(1).toString()); Assert.assertTrue(listWithNull.equals(testRecord.get("testEnumUnionArray"))); Assert.assertNull(testRecord.get("subRecordUnion")); Assert.assertEquals(newGenericSubRecord("valueOfSubField", null, "A"), testRecord.get("subRecord")); Assert.assertTrue( Arrays.asList(newGenericSubRecord("recordArrayValue", null, "A")).equals(testRecord.get("recordArray"))); Assert.assertTrue(listWithNull.equals(testRecord.get("recordUnionArray"))); Map stringableMap = new HashMap(); stringableMap.put(new Utf8("http://www.example2.com"), new Utf8("123")); Assert.assertEquals(stringableMap, testRecord.get("stringableMap")); Map recordMap = new HashMap(); recordMap.put(new Utf8("test"), newGenericSubRecord("recordMapValue", null, "A")); Assert.assertEquals(recordMap, testRecord.get("recordMap")); Map recordUnionMap = new HashMap(); recordUnionMap.put(new Utf8("test"), null); Assert.assertEquals(recordUnionMap, testRecord.get("recordUnionMap")); Assert.assertTrue( new ArrayList(Collections.singletonList(recordUnionMap)).equals(testRecord.get("recordUnionMapArray"))); Map recordUnionArrayMap = new HashMap(); recordUnionArrayMap.put(new Utf8("test"), listWithNull); Assert.assertTrue(recordUnionArrayMap.equals(testRecord.get("recordUnionArrayMap"))); }
Example 5
Source File: FastDeserializerDefaultsTest.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer") public void shouldAddFieldsInMiddleOfSchema(Boolean whetherUseFastDeserializer) throws IOException { // given Schema oldRecordSchema = TestRecord.SCHEMA$; GenericData.Record subRecord = new GenericData.Record(oldRecordSchema.getField("subRecordUnion").schema().getTypes().get(1)); Schema enumSchema = Schema.createEnum("TestEnum", "", "", Arrays.asList("A", "B", "C", "D", "E")); GenericData.EnumSymbol testEnum = AvroCompatibilityHelper.newEnumSymbol(enumSchema, "A"); GenericData.Fixed testFixed = new GenericData.Fixed(oldRecordSchema.getField("testFixed").schema()); testFixed.bytes(new byte[]{0x01}); GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema); oldRecord.put("testInt", 1); oldRecord.put("testLong", 1L); oldRecord.put("testDouble", 1.0); oldRecord.put("testFloat", 1.0f); oldRecord.put("testBoolean", true); oldRecord.put("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02})); oldRecord.put("testString", "aaa"); oldRecord.put("testFixed", testFixed); oldRecord.put("testEnum", testEnum); subRecord.put("subField", "abc"); subRecord.put("anotherField", "ghi"); oldRecord.put("subRecordUnion", subRecord); oldRecord.put("subRecord", subRecord); oldRecord.put("recordsArray", Collections.singletonList(subRecord)); Map<String, GenericData.Record> recordsMap = new HashMap<>(); recordsMap.put("1", subRecord); oldRecord.put("recordsMap", recordsMap); oldRecord.put("testFixedArray", Collections.emptyList()); oldRecord.put("testFixedUnionArray", Collections.emptyList()); oldRecord.put("testEnumArray", Collections.emptyList()); oldRecord.put("testEnumUnionArray", Collections.emptyList()); oldRecord.put("recordsArrayMap", Collections.emptyList()); oldRecord.put("recordsMapArray", Collections.emptyMap()); Schema newRecordSchema = Schema.parse(this.getClass().getResourceAsStream("/schema/defaultsTestSubrecord.avsc")); // when GenericRecord record = null; if (whetherUseFastDeserializer || Utils.isAvro14()) { record = decodeGenericFast(newRecordSchema, oldRecordSchema, genericDataAsDecoder(oldRecord)); } else { // There is a bug in Schema.applyAliases of avro-1.4, and the following invocation will trigger it. record = decodeGenericSlow(newRecordSchema, oldRecordSchema, genericDataAsDecoder(oldRecord)); } // then GenericData.Record newSubRecord = new GenericData.Record(newRecordSchema.getField("subRecordUnion").schema().getTypes().get(1)); newSubRecord.put("subField", new Utf8("abc")); newSubRecord.put("anotherField", new Utf8("ghi")); newSubRecord.put("newSubField", new Utf8("newSubFieldValue")); Map<Utf8, GenericData.Record> expectedRecordsMap = new HashMap<>(); expectedRecordsMap.put(new Utf8("1"), newSubRecord); Assert.assertEquals("newSubFieldValue", ((GenericRecord) record.get("subRecordUnion")).get("newSubField").toString()); Assert.assertEquals("newFieldValue", record.get("newField").toString()); Assert.assertEquals(1, record.get("testInt")); Assert.assertEquals(1L, record.get("testLong")); Assert.assertEquals(1.0, record.get("testDouble")); Assert.assertEquals(1.0f, record.get("testFloat")); Assert.assertEquals(true, record.get("testBoolean")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes")); Assert.assertEquals(new Utf8("aaa"), record.get("testString")); Assert.assertEquals(testFixed, record.get("testFixed")); Assert.assertEquals(testEnum, record.get("testEnum")); Assert.assertEquals(newSubRecord, record.get("subRecordUnion")); Assert.assertTrue(Arrays.asList(newSubRecord).equals(record.get("recordsArray"))); Assert.assertEquals(expectedRecordsMap, record.get("recordsMap")); Assert.assertTrue(Collections.emptyList().equals(record.get("testFixedArray"))); Assert.assertTrue(Collections.emptyList().equals(record.get("testFixedUnionArray"))); Assert.assertTrue(Collections.emptyList().equals(record.get("testEnumArray"))); Assert.assertTrue(Collections.emptyList().equals(record.get("testEnumUnionArray"))); Assert.assertTrue(Collections.emptyList().equals(record.get("recordsArrayMap"))); Assert.assertEquals(Collections.emptyMap(), record.get("recordsMapArray")); }
Example 6
Source File: FastSerdeTestsSupport.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public static Schema createEnumSchema(String name, String[] ordinals) { return Schema.createEnum(name, "", "com.adpilot.utils.generated.avro", Arrays.asList(ordinals)); }
Example 7
Source File: FastSerdeBenchmarkSupport.java From avro-fastserde with Apache License 2.0 | 4 votes |
public static Schema generateRandomEnumSchema() { return Schema.createEnum("Enum" + RandomStringUtils.randomAlphabetic(5), null, NAMESPACE, getRandomStringList()); }
Example 8
Source File: FastSerdeTestsSupport.java From avro-fastserde with Apache License 2.0 | 4 votes |
public static Schema createEnumSchema(String name, String[] ordinals) { return Schema.createEnum(name, "", NAMESPACE, Arrays.asList(ordinals)); }
Example 9
Source File: SqoopAvroUtils.java From sqoop-on-spark with Apache License 2.0 | 4 votes |
public static Schema createEnumSchema(Column column) { Set<String> options = ((org.apache.sqoop.schema.type.Enum) column).getOptions(); List<String> listOptions = new ArrayList<String>(options); return Schema.createEnum(column.getName(), null, SQOOP_SCHEMA_NAMESPACE, listOptions); }
Example 10
Source File: AvroUtils.java From incubator-gobblin with Apache License 2.0 | 4 votes |
/** * Copies the input {@link org.apache.avro.Schema} but changes the schema namespace. * @param schema {@link org.apache.avro.Schema} to copy. * @param namespaceOverride namespace for the copied {@link org.apache.avro.Schema}. * @return A {@link org.apache.avro.Schema} that is a copy of schema, but has the new namespace. */ public static Schema switchNamespace(Schema schema, Map<String, String> namespaceOverride) { Schema newSchema; String newNamespace = StringUtils.EMPTY; // Process all Schema Types // (Primitives are simply cloned) switch (schema.getType()) { case ENUM: newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace(); newSchema = Schema.createEnum(schema.getName(), schema.getDoc(), newNamespace, schema.getEnumSymbols()); break; case FIXED: newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace(); newSchema = Schema.createFixed(schema.getName(), schema.getDoc(), newNamespace, schema.getFixedSize()); break; case MAP: newSchema = Schema.createMap(switchNamespace(schema.getValueType(), namespaceOverride)); break; case RECORD: newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace()) : schema.getNamespace(); List<Schema.Field> newFields = new ArrayList<>(); if (schema.getFields().size() > 0) { for (Schema.Field oldField : schema.getFields()) { Field newField = new Field(oldField.name(), switchNamespace(oldField.schema(), namespaceOverride), oldField.doc(), oldField.defaultValue(), oldField.order()); newFields.add(newField); } } newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), newNamespace, schema.isError()); newSchema.setFields(newFields); break; case UNION: List<Schema> newUnionMembers = new ArrayList<>(); if (null != schema.getTypes() && schema.getTypes().size() > 0) { for (Schema oldUnionMember : schema.getTypes()) { newUnionMembers.add(switchNamespace(oldUnionMember, namespaceOverride)); } } newSchema = Schema.createUnion(newUnionMembers); break; case ARRAY: newSchema = Schema.createArray(switchNamespace(schema.getElementType(), namespaceOverride)); break; case BOOLEAN: case BYTES: case DOUBLE: case FLOAT: case INT: case LONG: case NULL: case STRING: newSchema = Schema.create(schema.getType()); break; default: String exceptionMessage = String.format("Schema namespace replacement failed for \"%s\" ", schema); LOG.error(exceptionMessage); throw new AvroRuntimeException(exceptionMessage); } // Copy schema metadata copyProperties(schema, newSchema); return newSchema; }
Example 11
Source File: AvroFlattener.java From incubator-gobblin with Apache License 2.0 | 4 votes |
/*** * Flatten the Schema to un-nest recursive Records (to make it optimal for ORC) * @param schema Schema to flatten * @param shouldPopulateLineage is set to true if the field is going to be flattened and moved up the hierarchy - * so that lineage information can be tagged to it; which happens when there is a * Record within a Record OR Record within Option within Record and so on, * however not when there is a Record within Map or Array * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Avro Schema */ private Schema flatten(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) { Schema flattenedSchema; // Process all Schema Types // (Primitives are simply cloned) switch (schema.getType()) { case ARRAY: // Array might be an array of recursive Records, flatten them if (flattenComplexTypes) { flattenedSchema = Schema.createArray(flatten(schema.getElementType(), false)); } else { flattenedSchema = Schema.createArray(schema.getElementType()); } break; case BOOLEAN: flattenedSchema = Schema.create(schema.getType()); break; case BYTES: flattenedSchema = Schema.create(schema.getType()); break; case DOUBLE: flattenedSchema = Schema.create(schema.getType()); break; case ENUM: flattenedSchema = Schema.createEnum(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getEnumSymbols()); break; case FIXED: flattenedSchema = Schema.createFixed(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getFixedSize()); break; case FLOAT: flattenedSchema = Schema.create(schema.getType()); break; case INT: flattenedSchema = Schema.create(schema.getType()); break; case LONG: flattenedSchema = Schema.create(schema.getType()); break; case MAP: if (flattenComplexTypes) { flattenedSchema = Schema.createMap(flatten(schema.getValueType(), false)); } else { flattenedSchema = Schema.createMap(schema.getValueType()); } break; case NULL: flattenedSchema = Schema.create(schema.getType()); break; case RECORD: flattenedSchema = flattenRecord(schema, shouldPopulateLineage, flattenComplexTypes); break; case STRING: flattenedSchema = Schema.create(schema.getType()); break; case UNION: flattenedSchema = flattenUnion(schema, shouldPopulateLineage, flattenComplexTypes); break; default: String exceptionMessage = String.format("Schema flattening failed for \"%s\" ", schema); LOG.error(exceptionMessage); throw new AvroRuntimeException(exceptionMessage); } // Copy schema metadata copyProperties(schema, flattenedSchema); return flattenedSchema; }
Example 12
Source File: JsonElementConversionFactory.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Override public Schema schema() { this.schema = Schema.createEnum(this.enumName, "", namespace, this.enumSet); this.schema.addProp(SOURCE_TYPE, ENUM.toString().toLowerCase()); return buildUnionIfNullable(this.schema); }
Example 13
Source File: JsonElementConversionWithAvroSchemaFactory.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Override public Schema schema() { this.schema = Schema.createEnum(this.enumName, "", "", this.enumSet); this.schema.addProp("source.type", "enum"); return this.schema; }
Example 14
Source File: Schemas.java From parquet-mr with Apache License 2.0 | 4 votes |
/** * Merges two {@link Schema} instances or returns {@code null}. * <p> * The two schemas are merged if they are the same type. Records are merged * if the two records have the same name or have no names but have a * significant number of shared fields. * <p> * @see {@link #mergeOrUnion} to return a union when a merge is not possible. * * @param left a {@code Schema} * @param right a {@code Schema} * @return a merged {@code Schema} or {@code null} if merging is not possible */ private static Schema mergeOnly(Schema left, Schema right) { if (Objects.equal(left, right)) { return left; } // handle primitive type promotion; doesn't promote integers to floats switch (left.getType()) { case INT: if (right.getType() == Schema.Type.LONG) { return right; } break; case LONG: if (right.getType() == Schema.Type.INT) { return left; } break; case FLOAT: if (right.getType() == Schema.Type.DOUBLE) { return right; } break; case DOUBLE: if (right.getType() == Schema.Type.FLOAT) { return left; } } // any other cases where the types don't match must be combined by a union if (left.getType() != right.getType()) { return null; } switch (left.getType()) { case UNION: return union(left, right); case RECORD: if (left.getName() == null && right.getName() == null && fieldSimilarity(left, right) < SIMILARITY_THRESH) { return null; } else if (!Objects.equal(left.getName(), right.getName())) { return null; } Schema combinedRecord = Schema.createRecord( coalesce(left.getName(), right.getName()), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), false ); combinedRecord.setFields(mergeFields(left, right)); return combinedRecord; case MAP: return Schema.createMap( mergeOrUnion(left.getValueType(), right.getValueType())); case ARRAY: return Schema.createArray( mergeOrUnion(left.getElementType(), right.getElementType())); case ENUM: if (!Objects.equal(left.getName(), right.getName())) { return null; } Set<String> symbols = Sets.newLinkedHashSet(); symbols.addAll(left.getEnumSymbols()); symbols.addAll(right.getEnumSymbols()); return Schema.createEnum( left.getName(), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), ImmutableList.copyOf(symbols) ); default: // all primitives are handled before the switch by the equality check. // schemas that reach this point are not primitives and also not any of // the above known types. throw new UnsupportedOperationException( "Unknown schema type: " + left.getType()); } }
Example 15
Source File: SchemaUtil.java From kite with Apache License 2.0 | 4 votes |
/** * Merges two {@link Schema} instances or returns {@code null}. * <p> * The two schemas are merged if they are the same type. Records are merged * if the two records have the same name or have no names but have a * significant number of shared fields. * <p> * @see {@link #mergeOrUnion} to return a union when a merge is not possible. * * @param left a {@code Schema} * @param right a {@code Schema} * @return a merged {@code Schema} or {@code null} if merging is not possible */ private static Schema mergeOnly(Schema left, Schema right) { if (Objects.equal(left, right)) { return left; } // handle primitive type promotion; doesn't promote integers to floats switch (left.getType()) { case INT: if (right.getType() == Schema.Type.LONG) { return right; } break; case LONG: if (right.getType() == Schema.Type.INT) { return left; } break; case FLOAT: if (right.getType() == Schema.Type.DOUBLE) { return right; } break; case DOUBLE: if (right.getType() == Schema.Type.FLOAT) { return left; } } // any other cases where the types don't match must be combined by a union if (left.getType() != right.getType()) { return null; } switch (left.getType()) { case UNION: return union(left, right); case RECORD: if (left.getName() == null && right.getName() == null && fieldSimilarity(left, right) < SIMILARITY_THRESH) { return null; } else if (!Objects.equal(left.getName(), right.getName())) { return null; } Schema combinedRecord = Schema.createRecord( coalesce(left.getName(), right.getName()), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), false ); combinedRecord.setFields(mergeFields(left, right)); return combinedRecord; case MAP: return Schema.createMap( mergeOrUnion(left.getValueType(), right.getValueType())); case ARRAY: return Schema.createArray( mergeOrUnion(left.getElementType(), right.getElementType())); case ENUM: if (!Objects.equal(left.getName(), right.getName())) { return null; } Set<String> symbols = Sets.newLinkedHashSet(); symbols.addAll(left.getEnumSymbols()); symbols.addAll(right.getEnumSymbols()); return Schema.createEnum( left.getName(), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), ImmutableList.copyOf(symbols) ); default: // all primitives are handled before the switch by the equality check. // schemas that reach this point are not primitives and also not any of // the above known types. throw new UnsupportedOperationException( "Unknown schema type: " + left.getType()); } }