Java Code Examples for org.apache.avro.Schema#getValueType()
The following examples show how to use
org.apache.avro.Schema#getValueType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroMapCodecs.java From funcj with MIT License | 6 votes |
@Override public Map<String, V> decode(CodecCoreEx<WithSchema, Object, Config> core, WithSchema in) { final Schema schema = checkSchemaType(in.schema(), Schema.Type.MAP); final Schema valueSchema = schema.getValueType(); final Map<CharSequence, Object> inMap = in.value(); final Map<String, V> map = new HashMap<>(); inMap.forEach((key, value) -> { final String name = key.toString(); map.put(name, valueCodec.decodeWithCheck(core, WithSchema.of(value, valueSchema))); }); return map; }
Example 2
Source File: PigAvroDatumReader.java From Cubert with Apache License 2.0 | 5 votes |
/** * Called to read a map instance. Overridden to read a pig map. */ protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throws IOException { Schema eValue = expected.getValueType(); long l = in.readMapStart(); Object map = newMap(old, (int) l); if (l > 0) { do { for (int i = 0; i < l; i++) { addToMap(map, readString(null, AvroStorageUtils.StringSchema, in), read(null, eValue, in)); } } while ((l = in.mapNext()) > 0); } return map; }
Example 3
Source File: MercifulJsonConverter.java From hudi with Apache License 2.0 | 5 votes |
private static JsonToAvroFieldProcessor generateMapTypeHandler() { return new JsonToAvroFieldProcessor() { @Override public Pair<Boolean, Object> convert(Object value, String name, Schema schema) { Schema valueSchema = schema.getValueType(); Map<String, Object> mapRes = new HashMap<>(); for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) { mapRes.put(v.getKey(), convertJsonToAvroField(v.getValue(), name, valueSchema)); } return Pair.of(true, mapRes); } }; }
Example 4
Source File: AvroResolver.java From pxf with Apache License 2.0 | 5 votes |
/** * When an Avro field is actually a map, we resolve the type of the map * value For each entry, the field name and value are added to a local * record we create an object of type OneField and insert it into the output * {@code List<OneField>} record. * <p> * Unchecked warning is suppressed to enable us to cast fieldValue to a Map. * (since the value schema has been identified to me of type map) * * @param record list of fields to be populated * @param fieldValue field value * @param mapSchema map schema * @return number of populated fields */ @SuppressWarnings("unchecked") int setMapField(List<OneField> record, Object fieldValue, Schema mapSchema) { Schema keySchema = Schema.create(Schema.Type.STRING); Schema valueSchema = mapSchema.getValueType(); Map<String, ?> avroMap = ((Map<String, ?>) fieldValue); for (Map.Entry<String, ?> entry : avroMap.entrySet()) { List<OneField> complexRecord = new LinkedList<>(); populateRecord(complexRecord, entry.getKey(), keySchema); populateRecord(complexRecord, entry.getValue(), valueSchema); addOneFieldToRecord(record, DataType.TEXT, HdfsUtilities.toString(complexRecord, mapkeyDelim)); } return avroMap.size(); }
Example 5
Source File: PigAvroDatumReader.java From spork with Apache License 2.0 | 5 votes |
/** * Called to read a map instance. Overridden to read a pig map. */ protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throws IOException { Schema eValue = expected.getValueType(); long l = in.readMapStart(); Object map = newMap(old, (int) l); if (l > 0) { do { for (int i = 0; i < l; i++) { addToMap(map, readString(null, AvroStorageUtils.StringSchema, in), read(null, eValue, in)); } } while ((l = in.mapNext()) > 0); } return map; }
Example 6
Source File: ParquetRecordReaderTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNestedMapGroup() throws IOException { Schema nestedMapSchema = unWrapSchema(NESTED_SCHEMA.getField("nestedMap").schema()); Preconditions.checkState(nestedMapSchema.getType().equals(Schema.Type.MAP)); Schema mapValueSchema = nestedMapSchema.getValueType(); GenericRecord mapValue = new GenericRecordBuilder(mapValueSchema) .set("type", "nested") .set("value", "nested_value").build(); ImmutableMap.Builder<String, GenericRecord> map = ImmutableMap.builder(); map.put("testKey", mapValue); GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA) .set("nestedMap", map.build()) .set("foo", 34L).build(); Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record)); MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA); ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema); InputFile inputFile = HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig); ParquetReadOptions options = ParquetReadOptions.builder().build(); ParquetFileReader fileReader = new ParquetFileReader(inputFile, options); rowReader.initialize(fileReader, testConfig); assertFalse(rowReader.reachEnd()); Row row = rowReader.nextRecord(); assertEquals(7, row.getArity()); assertEquals(34L, row.getField(0)); Map result = (Map) row.getField(5); Row nestedRow = (Row) result.get("testKey"); assertEquals("nested", nestedRow.getField(0)); assertEquals("nested_value", nestedRow.getField(1)); }
Example 7
Source File: ParquetRecordReaderTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNestedMapGroup() throws IOException { Schema nestedMapSchema = unWrapSchema(NESTED_SCHEMA.getField("nestedMap").schema()); Preconditions.checkState(nestedMapSchema.getType().equals(Schema.Type.MAP)); Schema mapValueSchema = nestedMapSchema.getValueType(); GenericRecord mapValue = new GenericRecordBuilder(mapValueSchema) .set("type", "nested") .set("value", "nested_value").build(); ImmutableMap.Builder<String, GenericRecord> map = ImmutableMap.builder(); map.put("testKey", mapValue); GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA) .set("nestedMap", map.build()) .set("foo", 34L).build(); Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record)); MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA); ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema); InputFile inputFile = HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig); ParquetReadOptions options = ParquetReadOptions.builder().build(); ParquetFileReader fileReader = new ParquetFileReader(inputFile, options); rowReader.initialize(fileReader, testConfig); assertFalse(rowReader.reachEnd()); Row row = rowReader.nextRecord(); assertEquals(7, row.getArity()); assertEquals(34L, row.getField(0)); Map result = (Map) row.getField(5); Row nestedRow = (Row) result.get("testKey"); assertEquals("nested", nestedRow.getField(0)); assertEquals("nested_value", nestedRow.getField(1)); }
Example 8
Source File: Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991(Schema readerSchema) { this.readerSchema = readerSchema; this.mapMapValueSchema0 = readerSchema.getValueType(); this.mapValueOptionSchema0 = mapMapValueSchema0 .getTypes().get(1); this.field0 = mapValueOptionSchema0 .getField("field").schema(); }
Example 9
Source File: AvroUtils.java From envelope with Apache License 2.0 | 4 votes |
/** * Convert Avro Types into their associated DataType. * * @param schemaType Avro Schema.Type * @return DataType representation */ public static DataType dataTypeFor(Schema schemaType) { LOG.trace("Converting Schema[{}] to DataType", schemaType); // Unwrap "optional" unions to the base type boolean isOptional = isNullable(schemaType); if (isOptional) { // if only 2 items in the union, then "unwrap," otherwise, it's a full union and should be rendered as such if (schemaType.getTypes().size() == 2) { LOG.trace("Unwrapping simple 'optional' union for {}", schemaType); for (Schema s : schemaType.getTypes()) { if (s.getType().equals(NULL)) { continue; } // Unwrap schemaType = s; break; } } } // Convert supported LogicalTypes if (null != schemaType.getLogicalType()) { LogicalType logicalType = schemaType.getLogicalType(); switch (logicalType.getName()) { case "date" : return DataTypes.DateType; case "timestamp-millis" : return DataTypes.TimestampType; case "decimal" : LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; return DataTypes.createDecimalType(decimal.getPrecision(), decimal.getScale()); default: // Pass-thru LOG.warn("Unsupported LogicalType[{}], continuing with underlying base type", logicalType.getName()); } } switch (schemaType.getType()) { case RECORD: // StructType List<StructField> structFieldList = Lists.newArrayListWithCapacity(schemaType.getFields().size()); for (Field f : schemaType.getFields()) { structFieldList.add(DataTypes.createStructField(f.name(), dataTypeFor(f.schema()), isNullable(f.schema()))); } return DataTypes.createStructType(structFieldList); case ARRAY: Schema elementType = schemaType.getElementType(); return DataTypes.createArrayType(dataTypeFor(elementType), isNullable(elementType)); case MAP: Schema valueType = schemaType.getValueType(); return DataTypes.createMapType(DataTypes.StringType, dataTypeFor(valueType), isNullable(valueType)); case UNION: // StructType of members List<StructField> unionFieldList = Lists.newArrayListWithCapacity(schemaType.getTypes().size()); int m = 0; for (Schema u : schemaType.getTypes()) { unionFieldList.add(DataTypes.createStructField("member" + m++, dataTypeFor(u), isNullable(u))); } return DataTypes.createStructType(unionFieldList); case FIXED: case BYTES: return DataTypes.BinaryType; case ENUM: case STRING: return DataTypes.StringType; case INT: return DataTypes.IntegerType; case LONG: return DataTypes.LongType; case FLOAT: return DataTypes.FloatType; case DOUBLE: return DataTypes.DoubleType; case BOOLEAN: return DataTypes.BooleanType; case NULL: return DataTypes.NullType; default: throw new RuntimeException(String.format("Unrecognized or unsupported Avro Type conversion: %s", schemaType)); } }
Example 10
Source File: AvroFieldsGenerator.java From registry with Apache License 2.0 | 4 votes |
private void parseSchema(Schema schema, List<SchemaFieldInfo> schemaFieldInfos, Set<String> visitedRecords) { Schema.Type type = schema.getType(); LOG.debug("Visiting type: [{}]", type); switch (type) { case RECORD: String completeName = schema.getFullName(); // Since we are only interested in primitive data types, if we encounter a record that was already parsed it can be ignored if (!visitedRecords.contains(completeName)) { visitedRecords.add(completeName); // store fields of a record. List<Schema.Field> fields = schema.getFields(); for (Schema.Field recordField : fields) { parseField(recordField, schemaFieldInfos, visitedRecords); } } break; case MAP: Schema valueTypeSchema = schema.getValueType(); parseSchema(valueTypeSchema, schemaFieldInfos, visitedRecords); break; case ENUM: break; case ARRAY: Schema elementType = schema.getElementType(); parseSchema(elementType, schemaFieldInfos, visitedRecords); break; case UNION: List<Schema> unionTypes = schema.getTypes(); for (Schema typeSchema : unionTypes) { parseSchema(typeSchema, schemaFieldInfos, visitedRecords); } break; case STRING: case INT: case LONG: case FLOAT: case DOUBLE: case FIXED: case BOOLEAN: case BYTES: case NULL: break; default: throw new RuntimeException("Unsupported type: " + type); } }
Example 11
Source File: FastSerializerGenerator.java From avro-fastserde with Apache License 2.0 | 4 votes |
private void processMap(final Schema mapSchema, JExpression mapExpr, JBlock body) { final JClass mapClass = schemaAssistant.classFromSchema(mapSchema); JClass keyClass = schemaAssistant.keyClassFromMapSchema(mapSchema); body.invoke(JExpr.direct(ENCODER), "writeMapStart"); final JExpression emptyMapCondition = mapExpr.eq(JExpr._null()) .cor(JExpr.invoke(mapExpr, "isEmpty")); final JConditional emptyMapIf = body._if(emptyMapCondition); final JBlock emptyMapBlock = emptyMapIf._then(); emptyMapBlock.invoke(JExpr.direct(ENCODER), "setItemCount").arg(JExpr.lit(0)); final JBlock nonEmptyMapBlock = emptyMapIf._else(); nonEmptyMapBlock.invoke(JExpr.direct(ENCODER), "setItemCount") .arg(JExpr.invoke(mapExpr, "size")); final JForEach mapKeysLoop = nonEmptyMapBlock.forEach(keyClass, getVariableName("key"), JExpr.invoke(JExpr.cast(mapClass, mapExpr), "keySet")); final JBlock forBody = mapKeysLoop.body(); forBody.invoke(JExpr.direct(ENCODER), "startItem"); JVar keyStringVar; if (SchemaAssistant.hasStringableKey(mapSchema)) { keyStringVar = forBody.decl(string, getVariableName("keyString"), mapKeysLoop.var().invoke("toString")); } else { keyStringVar = mapKeysLoop.var(); } final Schema valueSchema = mapSchema.getValueType(); forBody.invoke(JExpr.direct(ENCODER), "writeString").arg(keyStringVar); JVar containerVar; if (SchemaAssistant.isComplexType(valueSchema)) { containerVar = declareValueVar(valueSchema.getName(), valueSchema, forBody); forBody.assign(containerVar, JExpr.invoke(JExpr.cast(mapClass, mapExpr), "get").arg(mapKeysLoop.var())); processComplexType(valueSchema, containerVar, forBody); } else { processSimpleType(valueSchema, mapExpr.invoke("get").arg(mapKeysLoop.var()), forBody); } body.invoke(JExpr.direct(ENCODER), "writeMapEnd"); }
Example 12
Source File: FastDeserializerGenerator.java From avro-fastserde with Apache License 2.0 | 4 votes |
private void processMap(JVar mapSchemaVar, final String name, final Schema mapSchema, final Schema readerMapSchema, JBlock parentBody, FieldAction action, BiConsumer<JBlock, JExpression> putMapIntoParent) { if (action.getShouldRead()) { Symbol valuesActionSymbol = null; for (Symbol symbol : action.getSymbol().production) { if (Symbol.Kind.REPEATER.equals(symbol.kind) && "map-end".equals(getSymbolPrintName(((Symbol.Repeater) symbol).end))) { valuesActionSymbol = symbol; break; } } if (valuesActionSymbol == null) { throw new FastDeserializerGeneratorException("unable to determine action for map: " + name); } action = FieldAction.fromValues(mapSchema.getValueType().getType(), action.getShouldRead(), valuesActionSymbol); } else { action = FieldAction.fromValues(mapSchema.getValueType().getType(), false, EMPTY_SYMBOL); } final JVar mapVar = action.getShouldRead() ? declareValueVar(name, readerMapSchema, parentBody) : null; JVar chunkLen = parentBody.decl(codeModel.LONG, getVariableName("chunkLen"), JExpr.direct(DECODER + ".readMapStart()")); JConditional conditional = parentBody._if(chunkLen.gt(JExpr.lit(0))); JBlock ifBlock = conditional._then(); if (action.getShouldRead()) { ifBlock.assign(mapVar, JExpr._new(schemaAssistant.classFromSchema(readerMapSchema, false))); JBlock elseBlock = conditional._else(); elseBlock.assign(mapVar, codeModel.ref(Collections.class).staticInvoke("emptyMap")); } JDoLoop doLoop = ifBlock._do(chunkLen.gt(JExpr.lit(0))); JForLoop forLoop = doLoop.body()._for(); JVar counter = forLoop.init(codeModel.INT, getVariableName("counter"), JExpr.lit(0)); forLoop.test(counter.lt(chunkLen)); forLoop.update(counter.incr()); JBlock forBody = forLoop.body(); JClass keyClass = schemaAssistant.keyClassFromMapSchema(action.getShouldRead() ? readerMapSchema : mapSchema); JExpression keyValueExpression = (string.equals(keyClass)) ? JExpr.direct(DECODER + ".readString()") : JExpr.direct(DECODER + ".readString(null)"); if (SchemaAssistant.hasStringableKey(mapSchema)) { keyValueExpression = JExpr._new(keyClass).arg(keyValueExpression.invoke("toString")); } JVar key = forBody.decl(keyClass, getVariableName("key"), keyValueExpression); JVar mapValueSchemaVar = null; if (action.getShouldRead() && useGenericTypes) { mapValueSchemaVar = declareSchemaVar(mapSchema.getValueType(), name + "MapValueSchema", mapSchemaVar.invoke("getValueType")); } BiConsumer<JBlock, JExpression> putValueInMap = null; if (action.getShouldRead()) { putValueInMap = (block, expression) -> block.invoke(mapVar, "put").arg(key).arg(expression); } if (SchemaAssistant.isComplexType(mapSchema.getValueType())) { String valueName = name + "Value"; Schema readerMapValueSchema = null; if (action.getShouldRead()) { readerMapValueSchema = readerMapSchema.getValueType(); } processComplexType(mapValueSchemaVar, valueName, mapSchema.getValueType(), readerMapValueSchema, forBody, action, putValueInMap); } else { // to preserve reader string specific options use reader map schema if (action.getShouldRead() && Schema.Type.STRING.equals(mapSchema.getValueType().getType())) { processSimpleType(readerMapSchema.getValueType(), forBody, action, putValueInMap); } else { processSimpleType(mapSchema.getValueType(), forBody, action, putValueInMap); } } doLoop.body().assign(chunkLen, JExpr.direct(DECODER + ".mapNext()")); if (action.getShouldRead()) { putMapIntoParent.accept(parentBody, mapVar); } }
Example 13
Source File: Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399(Schema readerSchema) { this.readerSchema = readerSchema; this.mapMapValueSchema0 = readerSchema.getValueType(); this.field0 = mapMapValueSchema0 .getField("field").schema(); }
Example 14
Source File: Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public Map_of_record_GenericDeserializer_2141121767969292399_2141121767969292399(Schema readerSchema) { this.readerSchema = readerSchema; this.mapMapValueSchema0 = readerSchema.getValueType(); this.field0 = mapMapValueSchema0 .getField("field").schema(); }
Example 15
Source File: Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
public Map_of_UNION_GenericDeserializer_2087096002965517991_2087096002965517991(Schema readerSchema) { this.readerSchema = readerSchema; this.mapMapValueSchema0 = readerSchema.getValueType(); this.mapValueOptionSchema0 = mapMapValueSchema0 .getTypes().get(1); this.field0 = mapValueOptionSchema0 .getField("field").schema(); }
Example 16
Source File: AvroSchemaManager.java From spork with Apache License 2.0 | 4 votes |
/** * Initialize given a schema */ protected void init(String namespace, Schema schema, boolean ignoreNameMap) { /* put to map[type name]=>schema */ if (isNamedSchema(schema)) { String typeName = schema.getName(); if (typeName2Schema.containsKey(typeName)) AvroStorageLog.warn("Duplicate schemas defined for type:" + typeName + ". will ignore the second one:" + schema); else { AvroStorageLog.details("add " + schema.getName() + "=" + schema + " to type2Schema"); typeName2Schema.put(schema.getName(), schema); } } /* put field schema to map[field name]=>schema*/ if (schema.getType().equals(Type.RECORD)) { List<Field> fields = schema.getFields(); for (Field field : fields) { Schema fieldSchema = field.schema(); String name = (namespace == null) ? field.name() : namespace + "." + field.name(); if (!ignoreNameMap) { if (name2Schema.containsKey(name)) AvroStorageLog.warn("Duplicate schemas defined for alias:" + name + ". Will ignore the second one:"+ fieldSchema); else { AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema"); name2Schema.put(name, fieldSchema); } } init(name, fieldSchema, ignoreNameMap); } } else if (schema.getType().equals(Type.UNION)) { if (AvroStorageUtils.isAcceptableUnion(schema)) { Schema realSchema = AvroStorageUtils.getAcceptedType(schema); init(namespace, realSchema, ignoreNameMap); } else { List<Schema> list = schema.getTypes(); for (Schema s : list) { init(namespace, s, true); } } } else if (schema.getType().equals(Type.ARRAY)) { Schema elemSchema = schema.getElementType(); init(namespace, elemSchema, true); } else if (schema.getType().equals(Type.MAP)) { Schema valueSchema = schema.getValueType(); init(namespace, valueSchema, true); } }
Example 17
Source File: AvroNestedReader.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
/** * Processes a map at this point in the path. * * @param map the map to process * @param s the current schema at this point in the path * @param ignoreMissing true if null is to be returned for user fields that don't appear in the schema * @return the field value or null for out-of-bounds array indexes, non-existent map keys or unsupported avro types. * @throws KettleException if a problem occurs */ public Object convertToKettleValue( AvroInputField avroInputField, Map<Utf8, Object> map, Schema s, Schema defaultSchema, boolean ignoreMissing ) throws KettleException { if ( map == null ) { return null; } if ( avroInputField.getTempParts().size() == 0 ) { throw new KettleException( BaseMessages.getString( PKG, "AvroInput.Error.MalformedPathMap" ) ); } String part = avroInputField.getTempParts().remove( 0 ); if ( !( part.charAt( 0 ) == '[' ) ) { throw new KettleException( BaseMessages.getString( PKG, "AvroInput.Error.MalformedPathMap2", part ) ); } String key = part.substring( 1, part.indexOf( ']' ) ); if ( part.indexOf( ']' ) < part.length() - 1 ) { // more dimensions to the array/map part = part.substring( part.indexOf( ']' ) + 1, part.length() ); avroInputField.getTempParts().add( 0, part ); } Object value = map.get( new Utf8( key ) ); if ( value == null ) { return null; } Schema valueType = s.getValueType(); if ( valueType.getType() == Schema.Type.UNION ) { if ( value instanceof GenericContainer ) { // we can ask these things for their schema (covers // records, arrays, enums and fixed) valueType = ( (GenericContainer) value ).getSchema(); } else { // either have a map or primitive here if ( value instanceof Map ) { // now have to look for the schema of the map Schema mapSchema = null; for ( Schema ts : valueType.getTypes() ) { if ( ts.getType() == Schema.Type.MAP ) { mapSchema = ts; break; } } if ( mapSchema == null ) { throw new KettleException( BaseMessages.getString( PKG, "AvroInput.Error.UnableToFindSchemaForUnionMap" ) ); } valueType = mapSchema; } else { if ( avroInputField.getTempValueMeta().getType() != ValueMetaInterface.TYPE_STRING ) { // we have a two element union, where one element is the type // "null". So in this case we actually have just one type and can // output specific values of it (instead of using String as a // catch all for varying primitive types in the union) valueType = checkUnion( valueType ); } else { // use the string representation of the value valueType = Schema.create( Schema.Type.STRING ); } } } } // what have we got? if ( valueType.getType() == Schema.Type.RECORD ) { return convertToKettleValue( avroInputField, (GenericData.Record) value, valueType, defaultSchema, ignoreMissing ); } else if ( valueType.getType() == Schema.Type.ARRAY ) { return convertToKettleValue( avroInputField, (GenericData.Array) value, valueType, defaultSchema, ignoreMissing ); } else if ( valueType.getType() == Schema.Type.MAP ) { return convertToKettleValue( avroInputField, (Map<Utf8, Object>) value, valueType, defaultSchema, ignoreMissing ); } else { // assume a primitive return getPrimitive( avroInputField, value, valueType ); } }
Example 18
Source File: AvroNestedFieldGetter.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
/** * Builds a list of field objects holding paths corresponding to the leaf primitives in an Avro schema. * * @param s the schema to process * @return a List of field objects * @throws KettleException if a problem occurs */ public static List<? extends IAvroInputField> getLeafFields( Schema s ) throws KettleException { if ( s == null ) { return null; } List<AvroInputField> fields = new ArrayList<>(); String root = ""; if ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) { while ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) { if ( s.getType() == Schema.Type.ARRAY ) { root += "[0]"; s = s.getElementType(); } else { root += KEY; s = s.getValueType(); } } } if ( s.getType() == Schema.Type.RECORD ) { processRecord( root, s, fields ); } else if ( s.getType() == Schema.Type.UNION ) { processUnion( root, s, fields ); } else { // our top-level array/map structure bottoms out with primitive types // we'll create one zero-indexed path through to a primitive - the // user can copy and paste the path if they want to extract other // indexes out to separate Kettle fields AvroInputField newField = createAvroField( root, s ); if ( newField != null ) { fields.add( newField ); } } for ( int i = 0; i < fields.size() - 1; i++ ) { AvroInputField field = fields.get( i ); boolean duplicateName; int suffix = 0; String fieldName; do { fieldName = field.getPentahoFieldName(); if ( suffix > 0 ) { fieldName = fieldName + "-" + Integer.toString( suffix ); } duplicateName = false; for ( int j = i + 1; ( j < fields.size() ) && !duplicateName; j++ ) { duplicateName = fieldName.equals( fields.get( j ).getPentahoFieldName() ); } suffix++; } while ( duplicateName ); field.setPentahoFieldName( fieldName ); } return fields; }
Example 19
Source File: AvroSchemaManager.java From Cubert with Apache License 2.0 | 4 votes |
/** * Initialize given a schema */ protected void init(String namespace, Schema schema, boolean ignoreNameMap) { /* put to map[type name]=>schema */ if (isNamedSchema(schema)) { String typeName = schema.getName(); if (typeName2Schema.containsKey(typeName)) AvroStorageLog.warn("Duplicate schemas defined for type:" + typeName + ". will ignore the second one:" + schema); else { AvroStorageLog.details("add " + schema.getName() + "=" + schema + " to type2Schema"); typeName2Schema.put(schema.getName(), schema); } } /* put field schema to map[field name]=>schema*/ if (schema.getType().equals(Type.RECORD)) { List<Field> fields = schema.getFields(); for (Field field : fields) { Schema fieldSchema = field.schema(); String name = (namespace == null) ? field.name() : namespace + "." + field.name(); if (!ignoreNameMap) { if (name2Schema.containsKey(name)) AvroStorageLog.warn("Duplicate schemas defined for alias:" + name + ". Will ignore the second one:"+ fieldSchema); else { AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema"); name2Schema.put(name, fieldSchema); } } init(name, fieldSchema, ignoreNameMap); } } else if (schema.getType().equals(Type.UNION)) { if (AvroStorageUtils.isAcceptableUnion(schema)) { Schema realSchema = AvroStorageUtils.getAcceptedType(schema); init(namespace, realSchema, ignoreNameMap); } else { List<Schema> list = schema.getTypes(); for (Schema s : list) { init(namespace, s, true); } } } else if (schema.getType().equals(Type.ARRAY)) { Schema elemSchema = schema.getElementType(); init(namespace, elemSchema, true); } else if (schema.getType().equals(Type.MAP)) { Schema valueSchema = schema.getValueType(); init(namespace, valueSchema, true); } }
Example 20
Source File: AvroTypeSystem.java From transport with BSD 2-Clause "Simplified" License | 4 votes |
@Override protected Schema getMapValueType(Schema dataType) { return dataType.getValueType(); }