Java Code Examples for org.apache.avro.Schema#Type
The following examples show how to use
org.apache.avro.Schema#Type .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroRecordConverter.java From DataflowTemplates with Apache License 2.0 | 6 votes |
private static Optional<String> readString( GenericRecord record, Schema.Type avroType, String fieldName) { switch (avroType) { case INT: return Optional.ofNullable((Integer) record.get(fieldName)).map(String::valueOf); case LONG: return Optional.ofNullable((Long) record.get(fieldName)).map(String::valueOf); case FLOAT: return Optional.ofNullable((Float) record.get(fieldName)).map(String::valueOf); case DOUBLE: return Optional.ofNullable((Double) record.get(fieldName)).map(String::valueOf); case STRING: return Optional.ofNullable((Utf8) record.get(fieldName)).map(Utf8::toString); default: throw new IllegalArgumentException("Cannot interpret " + avroType + " as STRING"); } }
Example 2
Source File: AvroRecordConverter.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@VisibleForTesting @SuppressWarnings("unchecked") static Optional<List<Boolean>> readBoolArray( GenericRecord record, Schema.Type avroType, String fieldName) { switch (avroType) { case BOOLEAN: return Optional.ofNullable((List<Boolean>) record.get(fieldName)); case STRING: { List<Utf8> value = (List<Utf8>) record.get(fieldName); if (value == null) { return Optional.empty(); } List<Boolean> result = value .stream() .map(x -> x == null ? null : Boolean.valueOf(x.toString())) .collect(Collectors.toList()); return Optional.of(result); } default: throw new IllegalArgumentException("Cannot interpret " + avroType + " as BOOL"); } }
Example 3
Source File: ClusterIntegrationTestUtils.java From incubator-pinot with Apache License 2.0 | 6 votes |
/** * Helper method to generate random value for the given field type. * * @param fieldType Field type * @return Random value for the given field type */ private static Object generateRandomValue(Schema.Type fieldType) { switch (fieldType) { case BOOLEAN: return RANDOM.nextBoolean(); case INT: return RANDOM.nextInt(100000); case LONG: return RANDOM.nextLong() % 1000000; case FLOAT: return RANDOM.nextFloat() % 100000; case DOUBLE: return RANDOM.nextDouble() % 1000000; case STRING: return "potato" + RANDOM.nextInt(1000); default: throw new IllegalStateException("Unsupported field type: " + fieldType); } }
Example 4
Source File: HiveAvroORCQueryGenerator.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Referencing org.apache.hadoop.hive.serde2.avro.SchemaToTypeInfo#generateTypeInfo(org.apache.avro.Schema) on * how to deal with logical types that supported by Hive but not by Avro(e.g. VARCHAR). * * If unsupported logical types found, return empty string as a result. * @param schema Avro schema * @return * @throws AvroSerdeException */ public static String generateHiveSpecificLogicalType(Schema schema) throws AvroSerdeException { // For bytes type, it can be mapped to decimal. Schema.Type type = schema.getType(); if (type == Schema.Type.STRING && AvroSerDe.VARCHAR_TYPE_NAME .equalsIgnoreCase(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE))) { int maxLength = 0; try { maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); } return String.format("varchar(%s)", maxLength); } else { return StringUtils.EMPTY; } }
Example 5
Source File: NiFiOrcUtils.java From nifi with Apache License 2.0 | 6 votes |
public static String generateHiveDDL(Schema avroSchema, String tableName) { Schema.Type schemaType = avroSchema.getType(); StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS "); sb.append(tableName); sb.append(" ("); if (Schema.Type.RECORD.equals(schemaType)) { List<String> hiveColumns = new ArrayList<>(); List<Schema.Field> fields = avroSchema.getFields(); if (fields != null) { hiveColumns.addAll( fields.stream().map(field -> field.name() + " " + getHiveTypeFromAvroType(field.schema())).collect(Collectors.toList())); } sb.append(StringUtils.join(hiveColumns, ", ")); sb.append(") STORED AS ORC"); return sb.toString(); } else { throw new IllegalArgumentException("Avro schema is of type " + schemaType.getName() + ", not RECORD"); } }
Example 6
Source File: ThirdeyeAvroUtils.java From incubator-pinot with Apache License 2.0 | 5 votes |
/** * Extracts the datatype of a field from the avro schema, given the name of the field * @param fieldname * @param schema * @return */ public static String getDataTypeForField(String fieldname, Schema schema) { Field field = schema.getField(fieldname); if (field == null) { throw new IllegalStateException("Field " + fieldname + " does not exist in schema"); } final Schema.Type type = field.schema().getType(); if (type == Schema.Type.ARRAY) { throw new RuntimeException("TODO: validate correctness after commit b19a0965044d3e3f4f1541cc4cd9ea60b96a4b99"); } return DataType.valueOf(extractSchemaFromUnionIfNeeded(field.schema()).getType()).toString(); }
Example 7
Source File: ConvertAvroTypeToSQL.java From components with Apache License 2.0 | 5 votes |
private int convertRawAvroType(Schema.Type type) { Integer sqlType = this.config.CONVERT_AVROTYPE_TO_SQLTYPE.get(type); if(sqlType != null){ return sqlType; } switch (type) { case STRING: sqlType = Types.VARCHAR; break; case BYTES: sqlType = Types.BLOB; break; case INT: sqlType = Types.INTEGER; break; case LONG: sqlType = Types.INTEGER; break; case FLOAT: sqlType = Types.NUMERIC; break; case DOUBLE: sqlType = Types.NUMERIC; break; case BOOLEAN: sqlType = Types.BOOLEAN; break; default: // ignored types ENUM, RECORD, MAP, FIXED, ARRAY, NULL throw new UnsupportedOperationException(type + " Avro type not supported"); } return sqlType; }
Example 8
Source File: SalesforceAvroRegistryTest.java From components with Apache License 2.0 | 5 votes |
/** * Tests {@link SalesforceAvroRegistry#inferSchema(Object)} returns {@link Schema} of type {@link Type#DOUBLE}, * when percent Field is passed * * This test-case related to https://jira.talendforge.org/browse/TDI-37479 bug */ @Test public void testInferSchemaFieldPercent() { Field percentField = new Field(); percentField.setType(FieldType.percent); Schema schema = sRegistry.inferSchema(percentField); Schema.Type actualType = schema.getType(); assertThat(actualType, is(Schema.Type.DOUBLE)); }
Example 9
Source File: AvroTypeFactoryImpl.java From samza with Apache License 2.0 | 5 votes |
private void validateTopLevelAvroType(Schema schema) { Schema.Type type = schema.getType(); if (type != Schema.Type.RECORD) { String msg = String.format("Samza Sql supports only RECORD as top level avro type, But the Schema's type is %s", type); LOG.error(msg); throw new SamzaException(msg); } }
Example 10
Source File: CSVUtil.java From kite with Apache License 2.0 | 5 votes |
private static Schema.Type inferFieldType(String example) { if (example == null || example.isEmpty()) { return null; // not enough information } else if (LONG.matcher(example).matches()) { return Schema.Type.LONG; } else if (DOUBLE.matcher(example).matches()) { return Schema.Type.DOUBLE; } else if (FLOAT.matcher(example).matches()) { return Schema.Type.FLOAT; } return Schema.Type.STRING; }
Example 11
Source File: AvroRecordReader.java From Bats with Apache License 2.0 | 5 votes |
private void processRecord(final GenericContainer container, final Schema schema) { final Schema.Type type = schema.getType(); switch (type) { case RECORD: process(container, schema, null, new MapOrListWriterImpl(writer.rootAsMap()), fieldSelection); break; default: throw new DrillRuntimeException("Root object must be record type. Found: " + type); } }
Example 12
Source File: AvroGenerators.java From beam with Apache License 2.0 | 5 votes |
@Override public Schema generate(SourceOfRandomness random, GenerationStatus status) { Schema.Type type; if (nesting(status) >= MAX_NESTING) { type = random.choose(PRIMITIVE_TYPES); } else { type = random.choose(ALL_TYPES); } if (PRIMITIVE_TYPES.contains(type)) { return Schema.create(type); } else { nestingInc(status); if (type == Schema.Type.FIXED) { int size = random.choose(Arrays.asList(1, 5, 12)); return Schema.createFixed("fixed_" + branch(status), "", "", size); } else if (type == Schema.Type.UNION) { // only nullable fields, everything else isn't supported in row conversion code return UnionSchemaGenerator.INSTANCE.generate(random, status); } else if (type == Schema.Type.ENUM) { return EnumSchemaGenerator.INSTANCE.generate(random, status); } else if (type == Schema.Type.RECORD) { return RecordSchemaGenerator.INSTANCE.generate(random, status); } else if (type == Schema.Type.MAP) { return Schema.createMap(generate(random, status)); } else if (type == Schema.Type.ARRAY) { return Schema.createArray(generate(random, status)); } else { throw new AssertionError("Unexpected AVRO type: " + type); } } }
Example 13
Source File: FilterRowProperties.java From components with Apache License 2.0 | 4 votes |
/** * TODO: This method will be used once the field autocompletion will be implemented */ private Boolean isString(Schema.Type type) { return Schema.Type.STRING.equals(type); }
Example 14
Source File: FastDeserializerGeneratorBase.java From avro-util with BSD 2-Clause "Simplified" License | 4 votes |
private FieldAction(Schema.Type type, boolean shouldRead, Symbol symbol) { this.type = type; this.shouldRead = shouldRead; this.symbol = symbol; }
Example 15
Source File: AvroSchema2Pig.java From spork with Apache License 2.0 | 4 votes |
/** * Convert a schema with field name to a pig schema */ private static ResourceFieldSchema inconvert(Schema in, String fieldName, Set<Schema> visitedRecords) throws IOException { AvroStorageLog.details("InConvert avro schema with field name " + fieldName); Schema.Type avroType = in.getType(); ResourceFieldSchema fieldSchema = new ResourceFieldSchema(); fieldSchema.setName(fieldName); if (avroType.equals(Schema.Type.RECORD)) { AvroStorageLog.details("convert to a pig tuple"); if (visitedRecords.contains(in)) { fieldSchema.setType(DataType.BYTEARRAY); } else { visitedRecords.add(in); fieldSchema.setType(DataType.TUPLE); ResourceSchema tupleSchema = new ResourceSchema(); List<Schema.Field> fields = in.getFields(); ResourceFieldSchema[] childFields = new ResourceFieldSchema[fields.size()]; int index = 0; for (Schema.Field field : fields) { childFields[index++] = inconvert(field.schema(), field.name(), visitedRecords); } tupleSchema.setFields(childFields); fieldSchema.setSchema(tupleSchema); visitedRecords.remove(in); } } else if (avroType.equals(Schema.Type.ARRAY)) { AvroStorageLog.details("convert array to a pig bag"); fieldSchema.setType(DataType.BAG); Schema elemSchema = in.getElementType(); ResourceFieldSchema subFieldSchema = inconvert(elemSchema, ARRAY_FIELD, visitedRecords); add2BagSchema(fieldSchema, subFieldSchema); } else if (avroType.equals(Schema.Type.MAP)) { AvroStorageLog.details("convert map to a pig map"); fieldSchema.setType(DataType.MAP); } else if (avroType.equals(Schema.Type.UNION)) { if (AvroStorageUtils.isAcceptableUnion(in)) { Schema acceptSchema = AvroStorageUtils.getAcceptedType(in); ResourceFieldSchema realFieldSchema = inconvert(acceptSchema, null, visitedRecords); fieldSchema.setType(realFieldSchema.getType()); fieldSchema.setSchema(realFieldSchema.getSchema()); } else throw new IOException("Do not support generic union:" + in); } else if (avroType.equals(Schema.Type.FIXED)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.BOOLEAN)) { fieldSchema.setType(DataType.BOOLEAN); } else if (avroType.equals(Schema.Type.BYTES)) { fieldSchema.setType(DataType.BYTEARRAY); } else if (avroType.equals(Schema.Type.DOUBLE)) { fieldSchema.setType(DataType.DOUBLE); } else if (avroType.equals(Schema.Type.ENUM)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.FLOAT)) { fieldSchema.setType(DataType.FLOAT); } else if (avroType.equals(Schema.Type.INT)) { fieldSchema.setType(DataType.INTEGER); } else if (avroType.equals(Schema.Type.LONG)) { fieldSchema.setType(DataType.LONG); } else if (avroType.equals(Schema.Type.STRING)) { fieldSchema.setType(DataType.CHARARRAY); } else if (avroType.equals(Schema.Type.NULL)) { // value of NULL is always NULL fieldSchema.setType(DataType.INTEGER); } else { throw new IOException("Unsupported avro type:" + avroType); } return fieldSchema; }
Example 16
Source File: AvroSchemaProvider.java From registry with Apache License 2.0 | 4 votes |
private static Appendable build(Map<String, String> env, Schema schema, Appendable appendable) throws IOException { boolean firstTime = true; Schema.Type schemaType = schema.getType(); String fullName = schema.getFullName(); switch (schemaType) { default: // boolean, bytes, double, float, int, long, null, string return appendable.append('"').append(schemaType.getName()).append('"'); case UNION: appendable.append('['); for (Schema b : schema.getTypes()) { if (!firstTime) appendable.append(','); else firstTime = false; build(env, b, appendable); } return appendable.append(']'); case ARRAY: case MAP: appendable.append("{\"type\":\"").append(schemaType.getName()).append("\""); if (schemaType == Schema.Type.ARRAY) build(env, schema.getElementType(), appendable.append(",\"items\":")); else build(env, schema.getValueType(), appendable.append(",\"values\":")); return appendable.append("}"); case ENUM: if (env.get(fullName) != null) { return appendable.append(env.get(fullName)); } addNameType(env, appendable, schemaType, fullName); appendable.append(",\"symbols\":["); for (String enumSymbol : schema.getEnumSymbols()) { if (!firstTime) appendable.append(','); else firstTime = false; appendable.append('"').append(enumSymbol).append('"'); } return appendable.append("]").append("}"); case FIXED: if (env.get(fullName) != null) { return appendable.append(env.get(fullName)); } addNameType(env, appendable, schemaType, fullName); return appendable.append(",\"size\":").append(Integer.toString(schema.getFixedSize())).append("}"); case RECORD: if (env.get(fullName) != null) { return appendable.append(env.get(fullName)); } addNameType(env, appendable, schemaType, fullName); // avro resolution parsing does not handle aliases and default attributes // handle aliases Set<String> aliases = schema.getAliases(); if (aliases != null && !aliases.isEmpty()) { appendable.append("\"aliases\":") .append("[") .append(Joiner.on(",").join(aliases.stream() .map(x -> "\"" + x + "\"") .collect(Collectors.toList()))) .append("]") .append(","); } appendable.append(",\"fields\":["); for (Schema.Field field : schema.getFields()) { if (!firstTime) { appendable.append(','); } else { firstTime = false; } appendable.append("{\"name\":\"").append(field.name()).append("\"").append(",\"type\":"); // handle default value Object defaultValue = field.defaultVal(); if (defaultValue != null) { appendable.append(defaultValue.toString()); } build(env, field.schema(), appendable).append("}"); } return appendable.append("]").append("}"); } }
Example 17
Source File: AvroSchemaConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") private Type convertField(String fieldName, Schema schema, Type.Repetition repetition) { Types.PrimitiveBuilder<PrimitiveType> builder; Schema.Type type = schema.getType(); LogicalType logicalType = schema.getLogicalType(); if (type.equals(Schema.Type.BOOLEAN)) { builder = Types.primitive(BOOLEAN, repetition); } else if (type.equals(Schema.Type.INT)) { builder = Types.primitive(INT32, repetition); } else if (type.equals(Schema.Type.LONG)) { builder = Types.primitive(INT64, repetition); } else if (type.equals(Schema.Type.FLOAT)) { builder = Types.primitive(FLOAT, repetition); } else if (type.equals(Schema.Type.DOUBLE)) { builder = Types.primitive(DOUBLE, repetition); } else if (type.equals(Schema.Type.BYTES)) { builder = Types.primitive(BINARY, repetition); } else if (type.equals(Schema.Type.STRING)) { if (logicalType != null && logicalType.getName().equals(LogicalTypes.uuid().getName()) && writeParquetUUID) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES); } else { builder = Types.primitive(BINARY, repetition).as(stringType()); } } else if (type.equals(Schema.Type.RECORD)) { return new GroupType(repetition, fieldName, convertFields(schema.getFields())); } else if (type.equals(Schema.Type.ENUM)) { builder = Types.primitive(BINARY, repetition).as(enumType()); } else if (type.equals(Schema.Type.ARRAY)) { if (writeOldListStructure) { return ConversionPatterns.listType(repetition, fieldName, convertField("array", schema.getElementType(), REPEATED)); } else { return ConversionPatterns.listOfElements(repetition, fieldName, convertField(AvroWriteSupport.LIST_ELEMENT_NAME, schema.getElementType())); } } else if (type.equals(Schema.Type.MAP)) { Type valType = convertField("value", schema.getValueType()); // avro map key type is always string return ConversionPatterns.stringKeyMapType(repetition, fieldName, valType); } else if (type.equals(Schema.Type.FIXED)) { builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition) .length(schema.getFixedSize()); } else if (type.equals(Schema.Type.UNION)) { return convertUnion(fieldName, schema, repetition); } else { throw new UnsupportedOperationException("Cannot convert Avro type " + type); } // schema translation can only be done for known logical types because this // creates an equivalence if (logicalType != null) { if (logicalType instanceof LogicalTypes.Decimal) { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision())); } else { LogicalTypeAnnotation annotation = convertLogicalType(logicalType); if (annotation != null) { builder.as(annotation); } } } return builder.named(fieldName); }
Example 18
Source File: TestAvroExport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 4 votes |
private Field buildAvroField(String name, Schema.Type type) { return new Field(name, Schema.create(type), null, null); }
Example 19
Source File: TestCSVSchemaInference.java From kite with Apache License 2.0 | 4 votes |
public Schema schema(Schema.Type type) { return Schema.create(type); }
Example 20
Source File: JsonElementConversionWithAvroSchemaFactory.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Override public Schema.Type getTargetType() { return schema().getType(); }