org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo Java Exaples

Source File: HiveTypeConverter.java From metacat with Apache License 2.0

6 votes

@Override
public Type toMetacatType(final String type) {
    // Hack to fix presto "varchar" type coming in with no length which is required by Hive.
    final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
        "varchar".equals(type.toLowerCase()) ? serdeConstants.STRING_TYPE_NAME : type);
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
    // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
    // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
    // their original case
    if (typeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
        final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector) oi;
        oi = new HiveTypeConverter.SameCaseStandardStructObjectInspector(
            structTypeInfo.getAllStructFieldNames(), objectInspector);
    }
    return getCanonicalType(oi);
}

Source File: HiveSchemaConverter.java From kite with Apache License 2.0

6 votes

public static Schema convertTable(String table, Collection<FieldSchema> columns,
                                  @Nullable PartitionStrategy strategy) {
  ArrayList<String> fieldNames = Lists.newArrayList();
  ArrayList<TypeInfo> fieldTypes = Lists.newArrayList();
  LinkedList<String> start = Lists.newLinkedList();
  Collection<String[]> requiredFields = requiredFields(strategy);

  List<Schema.Field> fields = Lists.newArrayList();
  for (FieldSchema column : columns) {
    // pass null for the initial path to exclude the table name
    TypeInfo type = parseTypeInfo(column.getType());
    fieldNames.add(column.getName());
    fieldTypes.add(type);
    fields.add(convertField(start, column.getName(), type, requiredFields));
  }

  StructTypeInfo struct = new StructTypeInfo();
  struct.setAllStructFieldNames(fieldNames);
  struct.setAllStructFieldTypeInfos(fieldTypes);

  Schema recordSchema = Schema.createRecord(table, doc(struct), null, false);
  recordSchema.setFields(fields);

  return recordSchema;
}

Source File: HiveSchemaConverter.java From kite with Apache License 2.0

6 votes

private static Schema convert(LinkedList<String> path, String name,
                              StructTypeInfo type,
                              Collection<String[]> required) {
  List<String> names = type.getAllStructFieldNames();
  List<TypeInfo> types = type.getAllStructFieldTypeInfos();
  Preconditions.checkArgument(names.size() == types.size(),
      "Cannot convert struct: %s names != %s types",
      names.size(), types.size());

  List<Schema.Field> fields = Lists.newArrayList();
  for (int i = 0; i < names.size(); i += 1) {
    path.addLast(name);
    fields.add(convertField(path, names.get(i), types.get(i), required));
    path.removeLast();
  }

  Schema recordSchema = Schema.createRecord(name, doc(type), null, false);
  recordSchema.setFields(fields);

  return recordSchema;
}

Source File: HiveCoercionPolicy.java From presto with Apache License 2.0

6 votes

private boolean canCoerceForStruct(HiveType fromHiveType, HiveType toHiveType)
{
    if (fromHiveType.getCategory() != Category.STRUCT || toHiveType.getCategory() != Category.STRUCT) {
        return false;
    }
    List<String> fromFieldNames = ((StructTypeInfo) fromHiveType.getTypeInfo()).getAllStructFieldNames();
    List<String> toFieldNames = ((StructTypeInfo) toHiveType.getTypeInfo()).getAllStructFieldNames();
    List<HiveType> fromFieldTypes = extractStructFieldTypes(fromHiveType);
    List<HiveType> toFieldTypes = extractStructFieldTypes(toHiveType);
    // Rule:
    // * Fields may be added or dropped from the end.
    // * For all other field indices, the corresponding fields must have
    //   the same name, and the type must be coercible.
    for (int i = 0; i < min(fromFieldTypes.size(), toFieldTypes.size()); i++) {
        if (!fromFieldNames.get(i).equals(toFieldNames.get(i))) {
            return false;
        }
        if (!fromFieldTypes.get(i).equals(toFieldTypes.get(i)) && !canCoerce(fromFieldTypes.get(i), toFieldTypes.get(i))) {
            return false;
        }
    }
    return true;
}

Source File: HiveWriteUtils.java From presto with Apache License 2.0

6 votes

private static boolean isWritableType(TypeInfo typeInfo)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            return isWritablePrimitiveType(primitiveCategory);
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isWritableType(mapTypeInfo.getMapKeyTypeInfo()) && isWritableType(mapTypeInfo.getMapValueTypeInfo());
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isWritableType(listTypeInfo.getListElementTypeInfo());
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream().allMatch(HiveWriteUtils::isWritableType);
    }
    return false;
}

Source File: HiveType.java From presto with Apache License 2.0

6 votes

public List<String> getHiveDereferenceNames(List<Integer> dereferences)
{
    ImmutableList.Builder<String> dereferenceNames = ImmutableList.builder();
    TypeInfo typeInfo = getTypeInfo();
    for (int fieldIndex : dereferences) {
        checkArgument(typeInfo instanceof StructTypeInfo, "typeInfo should be struct type", typeInfo);
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;

        checkArgument(fieldIndex >= 0, "fieldIndex cannot be negative");
        checkArgument(fieldIndex < structTypeInfo.getAllStructFieldNames().size(),
                "fieldIndex should be less than the number of fields in the struct");
        String fieldName = structTypeInfo.getAllStructFieldNames().get(fieldIndex);
        dereferenceNames.add(fieldName);
        typeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex);
    }

    return dereferenceNames.build();
}

Source File: MDSSerde.java From multiple-dimension-spread with Apache License 2.0

6 votes

private StructTypeInfo getAllReadTypeInfo( final String columnNameProperty , final String columnTypeProperty ){
  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  ArrayList<String> columnNames = new ArrayList<String>();
  if ( columnNameProperty != null && 0 < columnNameProperty.length() ) {
    String[] columnNameArray = columnNameProperty.split(",");
    for( int i = 0 ; i < columnNameArray.length ; i++ ){
      columnNames.add( columnNameArray[i] );
      filedIndexMap.put( columnNameArray[i] , i );
    }
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( columnNames );
  rootType.setAllStructFieldTypeInfos( fieldTypes );

  return rootType;
}

Source File: NiFiRecordSerDe.java From nifi with Apache License 2.0

6 votes

private List<Object> deserialize(Record record, StructTypeInfo schema) throws SerDeException {
    List<Object> result = new ArrayList<>(Collections.nCopies(schema.getAllStructFieldNames().size(), null));

    try {
        RecordSchema recordSchema = record.getSchema();
        for (RecordField field : recordSchema.getFields()) {
            populateRecord(result, record.getValue(field), field, schema);
        }
    } catch(SerDeException se) {
        log.error("Error [{}] parsing Record [{}].", new Object[]{se.toString(), record}, se);
        throw se;
    } catch (Exception e) {
        log.error("Error [{}] parsing Record [{}].", new Object[]{e.toString(), record}, e);
        throw new SerDeException(e);
    }

    return result;
}

Source File: MDSSerde.java From multiple-dimension-spread with Apache License 2.0

6 votes

private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){
  Set<String> columnNameSet = new HashSet<String>();
  for( String columnName : projectionColumnNames.split(",") ){
    columnNameSet.add( columnName );
  }

  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  String[] splitNames = columnNameProperty.split(",");

  ArrayList<String> projectionColumnNameList = new ArrayList<String>();
  ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>();
  for( int i = 0 ; i < fieldTypes.size() ; i++ ){
    if( columnNameSet.contains( splitNames[i] ) ){
      projectionColumnNameList.add( splitNames[i] );
      projectionFieldTypeList.add( fieldTypes.get(i) );
    }
    filedIndexMap.put( splitNames[i] , i );
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( projectionColumnNameList );
  rootType.setAllStructFieldTypeInfos( projectionFieldTypeList );

  return rootType;
}

Source File: MDSSerde.java From multiple-dimension-spread with Apache License 2.0

6 votes

@Override
public void initialize( final Configuration conf, final Properties table , final Properties part ) throws SerDeException{
  LOG.info( table.toString() );
  if( part != null ){
    LOG.info( part.toString() );
  }
  String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
  String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);

  String projectionColumnNames = conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" );

  StructTypeInfo rootType;
  if( projectionColumnNames.isEmpty() ){
    rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty );
  }
  else{
    rootType = getColumnProjectionTypeInfo( columnNameProperty , columnTypeProperty , projectionColumnNames );
  }

  inspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType );
}

Source File: OrcStorage.java From spork with Apache License 2.0

6 votes

@Override
public RequiredFieldResponse pushProjection(
        RequiredFieldList requiredFieldList) throws FrontendException {
    if (requiredFieldList == null)
        return null;
    if (requiredFieldList.getFields() != null)
    {
        int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size();
        mRequiredColumns = new boolean[schemaSize];
        for (RequiredField rf: requiredFieldList.getFields())
        {
            if (rf.getIndex()!=-1)
                mRequiredColumns[rf.getIndex()] = true;
        }
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
        try {
            p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns));
        } catch (Exception e) {
            throw new RuntimeException("Cannot serialize mRequiredColumns");
        }
    }
    return new RequiredFieldResponse(true);
}

Source File: JSONSerDe.java From searchanalytics-bigdata with MIT License

6 votes

/**
 * Parses a JSON object and its fields. The Hive metadata is used to
 * determine how to parse the object fields.
 *
 * @param field
 *            - The JSON object to parse
 * @param fieldTypeInfo
 *            - Metadata about the Hive column
 * @return - A map representing the object and its fields
 */
@SuppressWarnings("unchecked")
private Object parseStruct(final Object field,
		final StructTypeInfo fieldTypeInfo) {
	final Map<Object, Object> map = (Map<Object, Object>) field;
	final ArrayList<TypeInfo> structTypes = fieldTypeInfo
			.getAllStructFieldTypeInfos();
	final ArrayList<String> structNames = fieldTypeInfo
			.getAllStructFieldNames();
	final List<Object> structRow = new ArrayList<Object>(structTypes.size());
	for (int i = 0; i < structNames.size(); i++) {
		structRow.add(parseField(map.get(structNames.get(i)),
				structTypes.get(i)));
	}
	return structRow;
}

Source File: EmoSerDe.java From emodb with Apache License 2.0

6 votes

private Object deserializeStruct(StructTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof Map)) {
        throw new SerDeException("Value not of type map");
    }
    //noinspection unchecked
    Map<String, Object> map = (Map<String, Object>) data;

    List<String> fieldNames = type.getAllStructFieldNames();
    List<TypeInfo> fieldTypes = type.getAllStructFieldTypeInfos();

    // When deserializing a struct the returned value is a list of values in the same order as the field names.

    List<Object> values = Lists.newArrayListWithCapacity(fieldNames.size());
    for (int i=0; i < fieldNames.size(); i++) {
        Object rawValue = getRawValueOrNullIfAbsent(fieldNames.get(i), map);
        Object value = deserialize(fieldTypes.get(i), rawValue);
        values.add(value);
    }

    return values;
}

Source File: JSONSerDe.java From searchanalytics-bigdata with MIT License

6 votes

/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help
 * perform actual serialization and deserialization of data.
 */
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	// Jai...change column names to lower case.
	colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	final String colTypesStr = tbl
			.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	final List<TypeInfo> colTypes = TypeInfoUtils
			.getTypeInfosFromTypeString(colTypesStr);
	rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
			colNames, colTypes);
	rowOI = TypeInfoUtils
			.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}

Source File: BlurObjectInspectorGenerator.java From incubator-retired-blur with Apache License 2.0

6 votes

private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException {
  switch (ti.getCategory()) {
  case PRIMITIVE:
    PrimitiveTypeInfo pti = (PrimitiveTypeInfo) ti;
    return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
  case STRUCT:
    StructTypeInfo sti = (StructTypeInfo) ti;
    List<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
    for (TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
      ois.add(createObjectInspectorWorker(typeInfo));
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(sti.getAllStructFieldNames(), ois);
  case LIST:
    ListTypeInfo lti = (ListTypeInfo) ti;
    TypeInfo listElementTypeInfo = lti.getListElementTypeInfo();
    return ObjectInspectorFactory.getStandardListObjectInspector(createObjectInspectorWorker(listElementTypeInfo));
  default:
    throw new SerDeException("No Hive categories matched for [" + ti + "]");
  }
}

Source File: ArrayWritableObjectInspector.java From indexr with Apache License 2.0

6 votes

public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) {

        typeInfo = rowTypeInfo;
        fieldNames = rowTypeInfo.getAllStructFieldNames();
        fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos();
        fields = new ArrayList<StructField>(fieldNames.size());
        fieldsByName = new HashMap<String, StructFieldImpl>();

        for (int i = 0; i < fieldNames.size(); ++i) {
            final String name = fieldNames.get(i);
            final TypeInfo fieldInfo = fieldInfos.get(i);

            final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i);
            fields.add(field);
            fieldsByName.put(name, field);
        }
    }

Source File: ArrayWritableObjectInspector.java From parquet-mr with Apache License 2.0

6 votes

public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) {

    typeInfo = rowTypeInfo;
    fieldNames = rowTypeInfo.getAllStructFieldNames();
    fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos();
    fields = new ArrayList<StructField>(fieldNames.size());
    fieldsByName = new HashMap<String, StructFieldImpl>();

    for (int i = 0; i < fieldNames.size(); ++i) {
      final String name = fieldNames.get(i);
      final TypeInfo fieldInfo = fieldInfos.get(i);

      final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i);
      fields.add(field);
      fieldsByName.put(name, field);
    }
  }

Source File: CobolDeserializer.java From Cobol-to-Hive with Apache License 2.0

6 votes

private Object worker(String columnName, TypeInfo columnType){
	
	switch(columnType.getCategory()) {
		
		case STRUCT:
			return deserializeStruct(columnName, (StructTypeInfo) columnType);
		case UNION:
			return deserializeUnion(columnName,(UnionTypeInfo) columnType);
		case LIST:
		return deserializeList(columnName, (ListTypeInfo) columnType);
		case MAP:
		throw new RuntimeException("map type is not possible for cobol layout" + columnType.getCategory());
		case PRIMITIVE:
		return deserializePrimitive(columnName, (PrimitiveTypeInfo) columnType);
		default:
		throw new RuntimeException("Unknown TypeInfo: " + columnType.getCategory());
	}
}

Source File: JSONCDHSerDe.java From bigdata-tutorial with Apache License 2.0

6 votes

/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}

Source File: JSONCDHSerDe.java From bigdata-tutorial with Apache License 2.0

6 votes

/**
 * Parses a JSON object according to the Hive column's type.
 *
 * @param field         - The JSON object to parse
 * @param fieldTypeInfo - Metadata about the Hive column
 * @return - The parsed value of the field
 */
private Object parseField(Object field, TypeInfo fieldTypeInfo) {
	switch (fieldTypeInfo.getCategory()) {
		case PRIMITIVE:
			// Jackson will return the right thing in this case, so just return
			// the object
			if (field instanceof String) {
				field = field.toString().replaceAll("\n", "\\\\n");
			}
			return field;
		case LIST:
			return parseList(field, (ListTypeInfo) fieldTypeInfo);
		case MAP:
			return parseMap(field, (MapTypeInfo) fieldTypeInfo);
		case STRUCT:
			return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
	}
}

Source File: HiveOrcSerDeManager.java From incubator-gobblin with Apache License 2.0

6 votes

/**
 * Extensible if there's other source-of-truth for fetching schema instead of interacting with HDFS.
 *
 * For purpose of initializing {@link org.apache.hadoop.hive.ql.io.orc.OrcSerde} object, it will require:
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMNS and
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMN_TYPES
 *
 */
protected void addSchemaPropertiesHelper(Path path, HiveRegistrationUnit hiveUnit) throws IOException {
  TypeInfo schema = getSchemaFromLatestFile(path, this.fs);
  if (schema instanceof StructTypeInfo) {
    StructTypeInfo structTypeInfo = (StructTypeInfo) schema;
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMNS,
        Joiner.on(",").join(structTypeInfo.getAllStructFieldNames()));
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMN_TYPES,
        Joiner.on(",").join(
            structTypeInfo.getAllStructFieldTypeInfos().stream().map(x -> x.getTypeName())
                .collect(Collectors.toList())));
  } else {
    // Hive always uses a struct with a field for each of the top-level columns as the root object type.
    // So for here we assume to-be-registered ORC files follow this pattern.
    throw new IllegalStateException("A valid ORC schema should be an instance of struct");
  }
}

Source File: OrcLazyRowObjectInspector.java From hive-dwrf with Apache License 2.0

5 votes

public OrcLazyRowObjectInspector(StructTypeInfo info) {
  super(info.getAllStructFieldNames().size());
  ArrayList<String> fieldNames = info.getAllStructFieldNames();
  ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
  for(int i=0; i < fieldNames.size(); ++i) {
    fields.add(new Field(fieldNames.get(i),
        OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldTypes.get(i)), i));
  }
}

Source File: OrcUtils.java From spork with Apache License 2.0

5 votes

PigStructInspector(StructTypeInfo info) {
    ArrayList<String> fieldNames = info.getAllStructFieldNames();
    ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
    fields = new ArrayList<StructField>(fieldNames.size());
    for (int i = 0; i < fieldNames.size(); ++i) {
        fields.add(new Field(fieldNames.get(i),
                createObjectInspector(fieldTypes.get(i)), i));
    }
}

Source File: OrcLazyStructObjectInspector.java From hive-dwrf with Apache License 2.0

5 votes

public OrcLazyStructObjectInspector(StructTypeInfo info) {
  ArrayList<String> fieldNames = info.getAllStructFieldNames();
  ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
  fields = new ArrayList<StructField>(fieldNames.size());
  for(int i=0; i < fieldNames.size(); ++i) {
    fields.add(new Field(fieldNames.get(i),
        OrcLazyObjectInspectorUtils.createWritableObjectInspector(fieldTypes.get(i)), i));
  }
}

Source File: TestSchemaConversion.java From kite with Apache License 2.0

5 votes

@Test
public void testSimpleRecord() {
  TypeInfo type = HiveSchemaConverter.convert(SIMPLE_RECORD);

  Assert.assertTrue("Record should be converted to struct",
      type instanceof StructTypeInfo);
  Assert.assertEquals("Field names should match",
      Lists.newArrayList("id", "name"),
      ((StructTypeInfo) type).getAllStructFieldNames());
  Assert.assertEquals("Field types should match",
      Lists.newArrayList(
          INT_TYPE_INFO,
          STRING_TYPE_INFO),
      ((StructTypeInfo) type).getAllStructFieldTypeInfos());
}

Source File: ArrayWritableObjectInspector.java From parquet-mr with Apache License 2.0

5 votes

private ObjectInspector getObjectInspector(final TypeInfo typeInfo) {
  if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetStringInspector;
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return new ArrayWritableObjectInspector((StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
    return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo));
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
    final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
    if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo)
            || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
      return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    } else {
      return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    }
  } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
    throw new UnsupportedOperationException("timestamp not implemented yet");
  } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetByteInspector;
  } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetShortInspector;
  } else {
    throw new IllegalArgumentException("Unknown field info: " + typeInfo);
  }

}

Source File: ParquetHiveSerDe.java From parquet-mr with Apache License 2.0

5 votes

@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {

  final TypeInfo rowTypeInfo;
  final List<String> columnNames;
  final List<TypeInfo> columnTypes;
  // Get column names and sort order
  final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }
  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }
  if (columnNames.size() != columnTypes.size()) {
    throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
      "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
      columnTypes);
  }
  // Create row related objects
  rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
  this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

  // Stats part
  stats = new SerDeStats();
  serializedSize = 0;
  deserializedSize = 0;
  status = LAST_OPERATION.UNKNOWN;
}

Source File: HiveSchemaConverter.java From kite with Apache License 2.0

5 votes

private static String doc(TypeInfo type) {
  if (type instanceof StructTypeInfo) {
    // don't add struct<a:t1,b:t2> when fields a and b will have doc strings
    return null;
  }
  return "Converted from '" + String.valueOf(type) + "'";
}

Source File: HiveSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

private static Type convertType(final String name, final TypeInfo typeInfo, final Repetition repetition) {
  if (typeInfo.getCategory().equals(Category.PRIMITIVE)) {
    if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BINARY, name);
    } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT32, name);
    } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT64, name);
    } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, name);
    } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, name);
    } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, name);
    } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
      // TODO : binaryTypeInfo is a byte array. Need to map it
      throw new UnsupportedOperationException("Binary type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
      throw new UnsupportedOperationException("Timestamp type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
      throw new UnsupportedOperationException("Void type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
      throw new UnsupportedOperationException("Unknown type not implemented");
    } else {
      throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    return convertArrayType(name, (ListTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return convertStructType(name, (StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    return convertMapType(name, (MapTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.UNION)) {
    throw new UnsupportedOperationException("Union type not implemented");
  } else {
    throw new IllegalArgumentException("Unknown type: " + typeInfo);
  }
}

Source File: HiveType.java From presto with Apache License 2.0

5 votes

public static boolean isSupportedType(TypeInfo typeInfo, StorageFormat storageFormat)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            return getPrimitiveType((PrimitiveTypeInfo) typeInfo) != null;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isSupportedType(mapTypeInfo.getMapKeyTypeInfo(), storageFormat) && isSupportedType(mapTypeInfo.getMapValueTypeInfo(), storageFormat);
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isSupportedType(listTypeInfo.getListElementTypeInfo(), storageFormat);
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream()
                    .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
        case UNION:
            // This feature (reading uniontypes as structs) has only been verified against Avro and ORC tables. Here's a discussion:
            //   1. Avro tables are supported and verified.
            //   2. ORC tables are supported and verified.
            //   3. The Parquet format doesn't support uniontypes itself so there's no need to add support for it in Presto.
            //   4. TODO: RCFile tables are not supported yet.
            //   5. TODO: The support for Avro is done in SerDeUtils so it's possible that formats other than Avro are also supported. But verification is needed.
            if (storageFormat.getSerDe().equalsIgnoreCase(AVRO.getSerDe()) || storageFormat.getSerDe().equalsIgnoreCase(ORC.getSerDe())) {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                return unionTypeInfo.getAllUnionObjectTypeInfos().stream()
                        .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
            }
    }
    return false;
}

org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo Java Examples