org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveCoercionRecordCursor.java    From presto with Apache License 2.0 6 votes vote down vote up
public MapCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, BridgingRecordCursor bridgingRecordCursor)
{
    requireNonNull(typeManager, "typeManage is null");
    requireNonNull(fromHiveType, "fromHiveType is null");
    requireNonNull(toHiveType, "toHiveType is null");
    this.bridgingRecordCursor = requireNonNull(bridgingRecordCursor, "bridgingRecordCursor is null");
    HiveType fromKeyHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType fromValueHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    HiveType toKeyHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType toValueHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    this.fromKeyValueTypes = fromHiveType.getType(typeManager).getTypeParameters();
    this.toType = toHiveType.getType(typeManager);
    this.toKeyValueTypes = toType.getTypeParameters();
    this.coercers = new Coercer[2];
    coercers[0] = fromKeyHiveType.equals(toKeyHiveType) ? null : createCoercer(typeManager, fromKeyHiveType, toKeyHiveType, bridgingRecordCursor);
    coercers[1] = fromValueHiveType.equals(toValueHiveType) ? null : createCoercer(typeManager, fromValueHiveType, toValueHiveType, bridgingRecordCursor);
    this.pageBuilder = coercers[0] == null && coercers[1] == null ? null : new PageBuilder(ImmutableList.of(toType));
}
 
Example #2
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * Parses a JSON object according to the Hive column's type.
 *
 * @param field         - The JSON object to parse
 * @param fieldTypeInfo - Metadata about the Hive column
 * @return - The parsed value of the field
 */
private Object parseField(Object field, TypeInfo fieldTypeInfo) {
	switch (fieldTypeInfo.getCategory()) {
		case PRIMITIVE:
			// Jackson will return the right thing in this case, so just return
			// the object
			if (field instanceof String) {
				field = field.toString().replaceAll("\n", "\\\\n");
			}
			return field;
		case LIST:
			return parseList(field, (ListTypeInfo) fieldTypeInfo);
		case MAP:
			return parseMap(field, (MapTypeInfo) fieldTypeInfo);
		case STRUCT:
			return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
	}
}
 
Example #3
Source File: EmoSerDe.java    From emodb with Apache License 2.0 6 votes vote down vote up
private Object deserializeMap(MapTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof Map)) {
        throw new SerDeException("Value not of type map");
    }
    //noinspection unchecked
    Map<String, Object> map = (Map<String, Object>) data;
    Map<Object, Object> values = Maps.newHashMap();

    for (Map.Entry<String, Object> entry : map.entrySet()) {
        Object key = deserialize(type.getMapKeyTypeInfo(), entry.getKey());
        Object value = deserialize(type.getMapValueTypeInfo(), entry.getValue());
        values.put(key, value);
    }

    return values;
}
 
Example #4
Source File: MDSMapObjectInspector.java    From multiple-dimension-spread with Apache License 2.0 6 votes vote down vote up
public MDSMapObjectInspector( final MapTypeInfo typeInfo ){
  TypeInfo keyTypeInfo = typeInfo.getMapKeyTypeInfo();
  if( keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && ( (PrimitiveTypeInfo)keyTypeInfo ).getPrimitiveCategory() == PrimitiveCategory.STRING ){
    keyObjectInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
  }
  else{
    throw new RuntimeException( "Map key type is string only." );
  }

  valueObjectInspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( typeInfo.getMapValueTypeInfo() ); 

  if( valueObjectInspector.getCategory() == ObjectInspector.Category.PRIMITIVE ){
    getField = new PrimitiveGetField( (PrimitiveObjectInspector)valueObjectInspector );
  }
  else if( valueObjectInspector.getCategory() == ObjectInspector.Category.UNION ){
    getField = new UnionGetField( (UnionTypeInfo)( typeInfo.getMapValueTypeInfo() ) );
  }
  else{
    getField = new NestedGetField();
  }
}
 
Example #5
Source File: HiveWriteUtils.java    From presto with Apache License 2.0 6 votes vote down vote up
private static boolean isWritableType(TypeInfo typeInfo)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            return isWritablePrimitiveType(primitiveCategory);
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isWritableType(mapTypeInfo.getMapKeyTypeInfo()) && isWritableType(mapTypeInfo.getMapValueTypeInfo());
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isWritableType(listTypeInfo.getListElementTypeInfo());
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream().allMatch(HiveWriteUtils::isWritableType);
    }
    return false;
}
 
Example #6
Source File: HiveBucketing.java    From presto with Apache License 2.0 6 votes vote down vote up
private static boolean containsTimestampBucketedV2(TypeInfo type)
{
    switch (type.getCategory()) {
        case PRIMITIVE:
            return ((PrimitiveTypeInfo) type).getPrimitiveCategory() == TIMESTAMP;
        case LIST:
            return containsTimestampBucketedV2(((ListTypeInfo) type).getListElementTypeInfo());
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) type;
            // Note: we do not check map value type because HiveBucketingV2#hashOfMap hashes map values with v1
            return containsTimestampBucketedV2(mapTypeInfo.getMapKeyTypeInfo());
        default:
            // TODO: support more types, e.g. ROW
            throw new UnsupportedOperationException("Computation of Hive bucket hashCode is not supported for Hive category: " + type.getCategory());
    }
}
 
Example #7
Source File: OrcFileWriter.java    From presto with Apache License 2.0 6 votes vote down vote up
private static ObjectInspector getJavaObjectInspector(TypeInfo typeInfo)
{
    Category category = typeInfo.getCategory();
    if (category == PRIMITIVE) {
        return getPrimitiveJavaObjectInspector(getPrimitiveTypeInfo(typeInfo.getTypeName()));
    }
    if (category == LIST) {
        ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
        return getStandardListObjectInspector(getJavaObjectInspector(listTypeInfo.getListElementTypeInfo()));
    }
    if (category == MAP) {
        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        return getStandardMapObjectInspector(
                getJavaObjectInspector(mapTypeInfo.getMapKeyTypeInfo()),
                getJavaObjectInspector(mapTypeInfo.getMapValueTypeInfo()));
    }
    throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unhandled storage type: " + category);
}
 
Example #8
Source File: HiveSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static Type convertType(final String name, final TypeInfo typeInfo, final Repetition repetition) {
  if (typeInfo.getCategory().equals(Category.PRIMITIVE)) {
    if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BINARY, name);
    } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT32, name);
    } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT64, name);
    } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, name);
    } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, name);
    } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, name);
    } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
      // TODO : binaryTypeInfo is a byte array. Need to map it
      throw new UnsupportedOperationException("Binary type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
      throw new UnsupportedOperationException("Timestamp type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
      throw new UnsupportedOperationException("Void type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
      throw new UnsupportedOperationException("Unknown type not implemented");
    } else {
      throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    return convertArrayType(name, (ListTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return convertStructType(name, (StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    return convertMapType(name, (MapTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.UNION)) {
    throw new UnsupportedOperationException("Union type not implemented");
  } else {
    throw new IllegalArgumentException("Unknown type: " + typeInfo);
  }
}
 
Example #9
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Convert Hive data type to a Flink data type.
 *
 * @param hiveType a Hive data type
 * @return the corresponding Flink data type
 */
public static DataType toFlinkType(TypeInfo hiveType) {
	checkNotNull(hiveType, "hiveType cannot be null");

	switch (hiveType.getCategory()) {
		case PRIMITIVE:
			return toFlinkPrimitiveType((PrimitiveTypeInfo) hiveType);
		case LIST:
			ListTypeInfo listTypeInfo = (ListTypeInfo) hiveType;
			return DataTypes.ARRAY(toFlinkType(listTypeInfo.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveType;
			return DataTypes.MAP(toFlinkType(mapTypeInfo.getMapKeyTypeInfo()), toFlinkType(mapTypeInfo.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType;

			List<String> names = structTypeInfo.getAllStructFieldNames();
			List<TypeInfo> typeInfos = structTypeInfo.getAllStructFieldTypeInfos();

			DataTypes.Field[] fields = new DataTypes.Field[names.size()];

			for (int i = 0; i < fields.length; i++) {
				fields[i] = DataTypes.FIELD(names.get(i), toFlinkType(typeInfos.get(i)));
			}

			return DataTypes.ROW(fields);
		default:
			throw new UnsupportedOperationException(
				String.format("Flink doesn't support Hive data type %s yet.", hiveType));
	}
}
 
Example #10
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 5 votes vote down vote up
/**
 * Parse a JSON object as a map. This uses the Hive metadata for the map
 * values to determine how to parse the values. The map is assumed to have
 * a string for a key.
 *
 * @param field         - The JSON list to parse
 * @param fieldTypeInfo - Metadata about the Hive column
 * @return
 */
private Object parseMap(Object field, MapTypeInfo fieldTypeInfo) {
	Map<Object, Object> map = (Map<Object, Object>) field;
	TypeInfo valueTypeInfo = fieldTypeInfo.getMapValueTypeInfo();
	if (map != null) {
		for (Map.Entry<Object, Object> entry : map.entrySet()) {
			map.put(entry.getKey(), parseField(entry.getValue(), valueTypeInfo));
		}
	}
	return map;
}
 
Example #11
Source File: HiveDynamoDBMapType.java    From emr-dynamodb-connector with Apache License 2.0 5 votes vote down vote up
@Override
public boolean supportsHiveType(TypeInfo typeInfo) {
  try {
    switch (typeInfo.getCategory()) {
      case MAP:
        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        if (!mapTypeInfo.getMapKeyTypeInfo().equals(TypeInfoFactory.stringTypeInfo)) {
          return false;
        }

        TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
        HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(valueTypeInfo);
        return true;

      case STRUCT:
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        for (TypeInfo fieldTypeInfo : structTypeInfo.getAllStructFieldTypeInfos()) {
          HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(fieldTypeInfo);
        }
        return true;

      default:
        return false;
    }
  } catch (IllegalArgumentException e) {
    return false;
  }
}
 
Example #12
Source File: HiveType.java    From presto with Apache License 2.0 5 votes vote down vote up
public static boolean isSupportedType(TypeInfo typeInfo, StorageFormat storageFormat)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            return getPrimitiveType((PrimitiveTypeInfo) typeInfo) != null;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isSupportedType(mapTypeInfo.getMapKeyTypeInfo(), storageFormat) && isSupportedType(mapTypeInfo.getMapValueTypeInfo(), storageFormat);
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isSupportedType(listTypeInfo.getListElementTypeInfo(), storageFormat);
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream()
                    .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
        case UNION:
            // This feature (reading uniontypes as structs) has only been verified against Avro and ORC tables. Here's a discussion:
            //   1. Avro tables are supported and verified.
            //   2. ORC tables are supported and verified.
            //   3. The Parquet format doesn't support uniontypes itself so there's no need to add support for it in Presto.
            //   4. TODO: RCFile tables are not supported yet.
            //   5. TODO: The support for Avro is done in SerDeUtils so it's possible that formats other than Avro are also supported. But verification is needed.
            if (storageFormat.getSerDe().equalsIgnoreCase(AVRO.getSerDe()) || storageFormat.getSerDe().equalsIgnoreCase(ORC.getSerDe())) {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                return unionTypeInfo.getAllUnionObjectTypeInfos().stream()
                        .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
            }
    }
    return false;
}
 
Example #13
Source File: EmoSerDe.java    From emodb with Apache License 2.0 5 votes vote down vote up
/**
 * Deserializes a raw value to the provided type.
 */
private Object deserialize(TypeInfo type, Object rawValue)
        throws SerDeException {
    Object value = null;

    if (rawValue != null) {
        switch (type.getCategory()) {
            case PRIMITIVE:
                value = deserializePrimitive((PrimitiveTypeInfo) type, rawValue);
                break;
            case STRUCT:
                value = deserializeStruct((StructTypeInfo) type, rawValue);
                break;
            case MAP:
                value = deserializeMap((MapTypeInfo) type, rawValue);
                break;
            case LIST:
                value = deserializeList((ListTypeInfo) type, rawValue);
                break;
            case UNION:
                value = deserializeUnion((UnionTypeInfo) type, rawValue);
                break;
        }
    }

    return value;
}
 
Example #14
Source File: HivePageSource.java    From presto with Apache License 2.0 5 votes vote down vote up
public MapCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType)
{
    requireNonNull(typeManager, "typeManage is null");
    requireNonNull(fromHiveType, "fromHiveType is null");
    this.toType = requireNonNull(toHiveType, "toHiveType is null").getType(typeManager);
    HiveType fromKeyHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType fromValueHiveType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    HiveType toKeyHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType toValueHiveType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    this.keyCoercer = fromKeyHiveType.equals(toKeyHiveType) ? null : createCoercer(typeManager, fromKeyHiveType, toKeyHiveType);
    this.valueCoercer = fromValueHiveType.equals(toValueHiveType) ? null : createCoercer(typeManager, fromValueHiveType, toValueHiveType);
}
 
Example #15
Source File: HiveInspectors.java    From flink with Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(TypeInfo type) {
	switch (type.getCategory()) {

		case PRIMITIVE:
			PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
			return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveType);
		case LIST:
			ListTypeInfo listType = (ListTypeInfo) type;
			return ObjectInspectorFactory.getStandardListObjectInspector(
					getObjectInspector(listType.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapType = (MapTypeInfo) type;
			return ObjectInspectorFactory.getStandardMapObjectInspector(
					getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structType = (StructTypeInfo) type;
			List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

			List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
			for (TypeInfo fieldType : fieldTypes) {
				fieldInspectors.add(getObjectInspector(fieldType));
			}

			return ObjectInspectorFactory.getStandardStructObjectInspector(
					structType.getAllStructFieldNames(), fieldInspectors);
		default:
			throw new CatalogException("Unsupported Hive type category " + type.getCategory());
	}
}
 
Example #16
Source File: HiveInspectors.java    From flink with Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(TypeInfo type) {
	switch (type.getCategory()) {

		case PRIMITIVE:
			PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
			return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveType);
		case LIST:
			ListTypeInfo listType = (ListTypeInfo) type;
			return ObjectInspectorFactory.getStandardListObjectInspector(
					getObjectInspector(listType.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapType = (MapTypeInfo) type;
			return ObjectInspectorFactory.getStandardMapObjectInspector(
					getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structType = (StructTypeInfo) type;
			List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

			List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
			for (TypeInfo fieldType : fieldTypes) {
				fieldInspectors.add(getObjectInspector(fieldType));
			}

			return ObjectInspectorFactory.getStandardStructObjectInspector(
					structType.getAllStructFieldNames(), fieldInspectors);
		default:
			throw new CatalogException("Unsupported Hive type category " + type.getCategory());
	}
}
 
Example #17
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Convert Hive data type to a Flink data type.
 *
 * @param hiveType a Hive data type
 * @return the corresponding Flink data type
 */
public static DataType toFlinkType(TypeInfo hiveType) {
	checkNotNull(hiveType, "hiveType cannot be null");

	switch (hiveType.getCategory()) {
		case PRIMITIVE:
			return toFlinkPrimitiveType((PrimitiveTypeInfo) hiveType);
		case LIST:
			ListTypeInfo listTypeInfo = (ListTypeInfo) hiveType;
			return DataTypes.ARRAY(toFlinkType(listTypeInfo.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveType;
			return DataTypes.MAP(toFlinkType(mapTypeInfo.getMapKeyTypeInfo()), toFlinkType(mapTypeInfo.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType;

			List<String> names = structTypeInfo.getAllStructFieldNames();
			List<TypeInfo> typeInfos = structTypeInfo.getAllStructFieldTypeInfos();

			DataTypes.Field[] fields = new DataTypes.Field[names.size()];

			for (int i = 0; i < fields.length; i++) {
				fields[i] = DataTypes.FIELD(names.get(i), toFlinkType(typeInfos.get(i)));
			}

			return DataTypes.ROW(fields);
		default:
			throw new UnsupportedOperationException(
				String.format("Flink doesn't support Hive data type %s yet.", hiveType));
	}
}
 
Example #18
Source File: MapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(Repetition.OPTIONAL, name, "map", keyType, valueType);
}
 
Example #19
Source File: SingleLevelArraySchemaConverter.java    From presto with Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return ConversionPatterns.mapType(repetition, name, keyType, valueType);
}
 
Example #20
Source File: HiveSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) {
  final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
      typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
  final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
      typeInfo.getMapValueTypeInfo());
  return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
}
 
Example #21
Source File: SingleLevelArrayMapKeyValuesSchemaConverter.java    From presto with Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(repetition, name, "map", keyType, valueType);
}
 
Example #22
Source File: ArrayWritableObjectInspector.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private ObjectInspector getObjectInspector(final TypeInfo typeInfo) {
  if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetStringInspector;
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return new ArrayWritableObjectInspector((StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
    return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo));
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
    final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
    if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo)
            || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
      return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    } else {
      return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    }
  } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
    throw new UnsupportedOperationException("timestamp not implemented yet");
  } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetByteInspector;
  } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetShortInspector;
  } else {
    throw new IllegalArgumentException("Unknown field info: " + typeInfo);
  }

}
 
Example #23
Source File: HiveCoercionPolicy.java    From presto with Apache License 2.0 5 votes vote down vote up
private boolean canCoerceForMap(HiveType fromHiveType, HiveType toHiveType)
{
    if (fromHiveType.getCategory() != Category.MAP || toHiveType.getCategory() != Category.MAP) {
        return false;
    }
    HiveType fromKeyType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType fromValueType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    HiveType toKeyType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType toValueType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    return (fromKeyType.equals(toKeyType) || canCoerce(fromKeyType, toKeyType)) &&
            (fromValueType.equals(toValueType) || canCoerce(fromValueType, toValueType));
}
 
Example #24
Source File: HiveBucketingV1.java    From presto with Apache License 2.0 5 votes vote down vote up
private static int hashOfMap(MapTypeInfo type, Block singleMapBlock)
{
    TypeInfo keyTypeInfo = type.getMapKeyTypeInfo();
    TypeInfo valueTypeInfo = type.getMapValueTypeInfo();
    int result = 0;
    for (int i = 0; i < singleMapBlock.getPositionCount(); i += 2) {
        result += hash(keyTypeInfo, singleMapBlock, i) ^ hash(valueTypeInfo, singleMapBlock, i + 1);
    }
    return result;
}
 
Example #25
Source File: JSONSerDe.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
/**
 * Parses a JSON object according to the Hive column's type.
 *
 * @param field
 *            - The JSON object to parse
 * @param fieldTypeInfo
 *            - Metadata about the Hive column
 * @return - The parsed value of the field
 */
private Object parseField(Object field, final TypeInfo fieldTypeInfo) {
	switch (fieldTypeInfo.getCategory()) {
	case PRIMITIVE:
		// Jackson will return the right thing in this case, so just return
		// the object
		// Get type-safe JSON values
		if (field instanceof String) {
			field = field.toString().replaceAll("\n", "\\\\n");
		}
		if (fieldTypeInfo.getTypeName().equalsIgnoreCase(
				serdeConstants.BIGINT_TYPE_NAME)) {
			field = new Long(String.valueOf(field));
		}
		return field;
	case LIST:
		return parseList(field, (ListTypeInfo) fieldTypeInfo);
	case MAP:
		return parseMap(field, (MapTypeInfo) fieldTypeInfo);
	case STRUCT:
		return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
	case UNION:
		// Unsupported by JSON
	default:
		return null;
	}
}
 
Example #26
Source File: XmlObjectInspectorFactory.java    From Hive-XML-SerDe with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the standard java object inspector
 * 
 * @param typeInfo
 *            the type info
 * @param xmlProcessor
 *            the XML processor
 * @return the standard java object inspector
 */
public static ObjectInspector getStandardJavaObjectInspectorFromTypeInfo(TypeInfo typeInfo, XmlProcessor xmlProcessor) {
    switch (typeInfo.getCategory()) {
        case PRIMITIVE: {
            return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
        }
        case LIST: {
            ObjectInspector listElementObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(((ListTypeInfo) typeInfo).getListElementTypeInfo(),
                xmlProcessor);
            return new XmlListObjectInspector(listElementObjectInspector, xmlProcessor);
        }
        case MAP: {
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            ObjectInspector mapKeyObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo(),
                xmlProcessor);
            ObjectInspector mapValueObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo(),
                xmlProcessor);
            return new XmlMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, xmlProcessor);
        }
        case STRUCT: {
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<String> structFieldNames = structTypeInfo.getAllStructFieldNames();
            List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
            for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); ++fieldIndex) {
                structFieldObjectInspectors.add(getStandardJavaObjectInspectorFromTypeInfo(fieldTypeInfos.get(fieldIndex), xmlProcessor));
            }
            return getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors, xmlProcessor);
        }
        default: {
            throw new IllegalStateException();
        }
    }
}
 
Example #27
Source File: HiveBucketingV2.java    From presto with Apache License 2.0 5 votes vote down vote up
private static int hashOfMap(MapTypeInfo type, Block singleMapBlock)
{
    TypeInfo keyTypeInfo = type.getMapKeyTypeInfo();
    TypeInfo valueTypeInfo = type.getMapValueTypeInfo();
    int result = 0;
    for (int i = 0; i < singleMapBlock.getPositionCount(); i += 2) {
        // Sic! we're hashing map keys with v2 but map values with v1 just as in
        // https://github.com/apache/hive/blob/7dc47faddba9f079bbe2698aaa4d8712e7654f87/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java#L903-L904
        result += hash(keyTypeInfo, singleMapBlock, i) ^ HiveBucketingV1.hash(valueTypeInfo, singleMapBlock, i + 1);
    }
    return result;
}
 
Example #28
Source File: OrcUtils.java    From spork with Apache License 2.0 4 votes vote down vote up
public static ObjectInspector createObjectInspector(TypeInfo info) {
    switch (info.getCategory()) {
    case PRIMITIVE:
      switch (((PrimitiveTypeInfo) info).getPrimitiveCategory()) {
        case FLOAT:
          return PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
        case DOUBLE:
          return PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
        case BOOLEAN:
          return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
        case INT:
          return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
        case LONG:
          return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
        case STRING:
          return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
        case TIMESTAMP:
          return new PigJodaTimeStampObjectInspector();
        case DECIMAL:
          return new PigDecimalObjectInspector();
        case BINARY:
          return new PigDataByteArrayObjectInspector();
        case DATE:
        case VARCHAR:
        case BYTE:
        case SHORT:
            throw new IllegalArgumentException("Should never happen, " + 
                    (((PrimitiveTypeInfo) info).getPrimitiveCategory()) +
                    "is not valid Pig primitive data type");
        default:
            throw new IllegalArgumentException("Unknown primitive type " +
                    ((PrimitiveTypeInfo) info).getPrimitiveCategory());
      }
    case STRUCT:
      return new PigStructInspector((StructTypeInfo) info);
    case MAP:
      return new PigMapObjectInspector((MapTypeInfo) info);
    case LIST:
      return new PigListObjectInspector((ListTypeInfo) info);
    default:
      throw new IllegalArgumentException("Unknown type " +
        info.getCategory());
  }
}
 
Example #29
Source File: HiveSchemaConverter.java    From kite with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
static Schema convert(LinkedList<String> path, String name,
                      TypeInfo type, Collection<String[]> required) {
  switch (type.getCategory()) {
    case PRIMITIVE:
      if (type.getClass() == charClass || type.getClass() == varcharClass) {
        // this is required because type name includes length
        return Schema.create(Schema.Type.STRING);
      }

      String typeInfoName = type.getTypeName();
      Preconditions.checkArgument(TYPEINFO_TO_TYPE.containsKey(typeInfoName),
          "Cannot convert unsupported type: %s", typeInfoName);
      return Schema.create(TYPEINFO_TO_TYPE.get(typeInfoName));

    case LIST:
      return Schema.createArray(optional(convert(path, name,
          ((ListTypeInfo) type).getListElementTypeInfo(), required)));

    case MAP:
      MapTypeInfo mapType = (MapTypeInfo) type;
      Preconditions.checkArgument(
          "string".equals(mapType.getMapKeyTypeInfo().toString()),
          "Non-String map key type: %s", mapType.getMapKeyTypeInfo());

      return Schema.createMap(optional(convert(path, name,
          mapType.getMapValueTypeInfo(), required)));

    case STRUCT:
      return convert(path, name, (StructTypeInfo) type, required);

    case UNION:
      List<TypeInfo> unionTypes = ((UnionTypeInfo) type)
          .getAllUnionObjectTypeInfos();

      // add NULL so all union types are optional
      List<Schema> types = Lists.newArrayList(NULL);
      for (int i = 0; i < unionTypes.size(); i += 1) {
        // types within unions cannot be required
        types.add(convert(
            path, name + "_" + i, unionTypes.get(i), NO_REQUIRED_FIELDS));
      }

      return Schema.createUnion(types);

    default:
      throw new IllegalArgumentException(
          "Unknown TypeInfo category: " + type.getCategory());
  }
}
 
Example #30
Source File: CassandraColumnSerDe.java    From Hive-Cassandra with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize the cassandra serialization and deserialization parameters from table properties and configuration.
 *
 * @param job
 * @param tbl
 * @param serdeName
 * @throws SerDeException
 */
@Override
protected void initCassandraSerDeParameters(Configuration job, Properties tbl, String serdeName)
    throws SerDeException {
  cassandraColumnFamily = getCassandraColumnFamily(tbl);
  cassandraColumnNames = parseOrCreateColumnMapping(tbl);

  cassandraColumnNamesBytes = new ArrayList<BytesWritable>();
  for (String columnName : cassandraColumnNames) {
    cassandraColumnNamesBytes.add(new BytesWritable(columnName.getBytes()));
  }

  iKey = cassandraColumnNames.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);

  serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);

  validatorType = parseOrCreateValidatorType(tbl);

  setTableMapping();

  if (cassandraColumnNames.size() != serdeParams.getColumnNames().size()) {
    throw new SerDeException(serdeName + ": columns has " +
        serdeParams.getColumnNames().size() +
        " elements while cassandra.columns.mapping has " +
        cassandraColumnNames.size() + " elements" +
        " (counting the key if implicit)");
  }

  // we just can make sure that "StandardColumn:" is mapped to MAP<String,?>
  for (int i = 0; i < cassandraColumnNames.size(); i++) {
    String cassandraColName = cassandraColumnNames.get(i);
    if (cassandraColName.endsWith(":")) {
      TypeInfo typeInfo = serdeParams.getColumnTypes().get(i);
      if ((typeInfo.getCategory() != Category.MAP) ||
          (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getTypeName()
              != Constants.STRING_TYPE_NAME)) {

        throw new SerDeException(
            serdeName + ": Cassandra column family '"
                + cassandraColName
                + "' should be mapped to map<string,?> but is mapped to "
                + typeInfo.getTypeName());
      }
    }
  }
}