org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils#getTypeInfosFromTypeString

Source File: HiveUtils.java From elasticsearch-hadoop with Apache License 2.0

6 votes

static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
        inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}

Source File: MDSSerde.java From multiple-dimension-spread with Apache License 2.0

6 votes

private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){
  Set<String> columnNameSet = new HashSet<String>();
  for( String columnName : projectionColumnNames.split(",") ){
    columnNameSet.add( columnName );
  }

  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  String[] splitNames = columnNameProperty.split(",");

  ArrayList<String> projectionColumnNameList = new ArrayList<String>();
  ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>();
  for( int i = 0 ; i < fieldTypes.size() ; i++ ){
    if( columnNameSet.contains( splitNames[i] ) ){
      projectionColumnNameList.add( splitNames[i] );
      projectionFieldTypeList.add( fieldTypes.get(i) );
    }
    filedIndexMap.put( splitNames[i] , i );
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( projectionColumnNameList );
  rootType.setAllStructFieldTypeInfos( projectionFieldTypeList );

  return rootType;
}

Source File: JSONSerDe.java From searchanalytics-bigdata with MIT License

6 votes

/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help
 * perform actual serialization and deserialization of data.
 */
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	// Jai...change column names to lower case.
	colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	final String colTypesStr = tbl
			.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	final List<TypeInfo> colTypes = TypeInfoUtils
			.getTypeInfosFromTypeString(colTypesStr);
	rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
			colNames, colTypes);
	rowOI = TypeInfoUtils
			.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}

Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0

6 votes

@Test
public void testMapValues() throws SerDeException {
    List<String> columnNames = Arrays.asList("a,b".split(","));
    List<TypeInfo> columnTypes =
            TypeInfoUtils.getTypeInfosFromTypeString("array<string>,map<string,int>");

    Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} ");
    Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}");
    Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}");

    List<Object> expected1 = Arrays.<Object>asList(Arrays.<String>asList("aaa"),
        createHashMapStringInteger("bbb", 1));
    List<Object> expected2 = Arrays.<Object>asList(Arrays.<String>asList("yyy"),
        createHashMapStringInteger("zzz", 123));
    List<Object> expected3 = Arrays.<Object>asList(Arrays.<String>asList("a"),
        createHashMapStringInteger("x", 11, "y", 22, "z", null));

    List<Object> result1 = JsonSerdeUtils.deserialize(text1, columnNames, columnTypes);
    List<Object> result2 = JsonSerdeUtils.deserialize(text2, columnNames, columnTypes);
    List<Object> result3 = JsonSerdeUtils.deserialize(text3, columnNames, columnTypes);

    Assert.assertEquals(expected1, result1);
    Assert.assertEquals(expected2, result2);
    Assert.assertEquals(expected3, result3);
}

Source File: JSONCDHSerDe.java From bigdata-tutorial with Apache License 2.0

6 votes

/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}

Source File: OrcSerde.java From hive-dwrf with Apache License 2.0

5 votes

@Override
public void initialize(Configuration conf, Properties table) {
  // Read the configuration parameters
  String columnNameProperty = table.getProperty("columns");
  // NOTE: if "columns.types" is missing, all columns will be of String type
  String columnTypeProperty = table.getProperty("columns.types");

  // Parse the configuration parameters
  ArrayList<String> columnNames = EMPTY_STRING_ARRAYLIST;
  if (columnNameProperty != null && columnNameProperty.length() > 0) {
    String[] splits = columnNameProperty.split(",");
    columnNames = new ArrayList<String>(splits.length);

    for(String name: splits) {
      columnNames.add(name);
    }
  }
  if (columnTypeProperty == null) {
    // Default type: all string
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < columnNames.size(); i++) {
      if (i > 0) {
        sb.append(":");
      }
      sb.append("string");
    }
    columnTypeProperty = sb.toString();
  }

  ArrayList<TypeInfo> fieldTypes =
    TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  StructTypeInfo rootType = new StructTypeInfo();
  rootType.setAllStructFieldNames(columnNames);
  rootType.setAllStructFieldTypeInfos(fieldTypes);
  inspector = new OrcLazyRowObjectInspector(rootType);
}

Source File: TestHiveSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) {
  List<TypeInfo> columnTypes;

  if (columnsTypeStr.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr);
  }

  return columnTypes;
}

Source File: ParquetHiveSerDe.java From parquet-mr with Apache License 2.0

5 votes

@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {

  final TypeInfo rowTypeInfo;
  final List<String> columnNames;
  final List<TypeInfo> columnTypes;
  // Get column names and sort order
  final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }
  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }
  if (columnNames.size() != columnTypes.size()) {
    throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
      "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
      columnTypes);
  }
  // Create row related objects
  rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
  this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

  // Stats part
  stats = new SerDeStats();
  serializedSize = 0;
  deserializedSize = 0;
  status = LAST_OPERATION.UNKNOWN;
}

Source File: MapredParquetOutputFormat.java From parquet-mr with Apache License 2.0

5 votes

/**
 *
 * Create the parquet schema from the hive schema, and return the RecordWriterWrapper which
 * contains the real output format
 */
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(
    final JobConf jobConf,
    final Path finalOutPath,
    final Class<? extends Writable> valueClass,
    final boolean isCompressed,
    final Properties tableProperties,
    final Progressable progress) throws IOException {

  LOG.info("creating new record writer...{}", this);

  final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES);
  List<String> columnNames;
  List<TypeInfo> columnTypes;

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }

  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }

  DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf);
  return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress);
}

Source File: AvroSchemaGenerator.java From HiveKa with Apache License 2.0

5 votes

public Schema getSchema(String columnNamesStr, String columnTypesStr,
                        String columnCommentsStr, String namespace, String name,
                        String doc) {
  List<String> columnNames = Arrays.asList(columnNamesStr.split(","));
  List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesStr);
  List<String> columnComments;
  if (columnCommentsStr.isEmpty()) {
    columnComments = new ArrayList<String>();
  } else {
    columnComments = Arrays.asList(columnCommentsStr.split(","));
  }

  return typeInfoToSchema.convert(columnNames, columnTypes, columnComments, namespace, name, doc);
}

Source File: LWSerDe.java From hive-solr with Apache License 2.0

5 votes

@Override
public void initialize(Configuration conf, Properties tblProperties) throws SerDeException {
  colNames = Arrays.asList(tblProperties.getProperty(serdeConstants.LIST_COLUMNS).split(","));
  colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tblProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
  typeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
  inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
  row = new ArrayList<>();
  enableFieldMapping = Boolean.valueOf(tblProperties.getProperty(ENABLE_FIELD_MAPPING, "false"));
}

Source File: IndexRSerde.java From indexr with Apache License 2.0

5 votes

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
    String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

    if (Strings.isEmpty(columnNameProperty)) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (Strings.isEmpty(columnTypeProperty)) {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size()));
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " +
                "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
                columnTypes);
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    stats = new SerDeStats();
    serdeSize = 0;
}

Source File: FromJsonUDF.java From incubator-hivemall with Apache License 2.0

5 votes

@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2 && argOIs.length != 3) {
        throw new UDFArgumentException(
            "from_json takes two or three arguments: " + argOIs.length);
    }

    this.jsonOI = HiveUtils.asStringOI(argOIs[0]);

    String typeString = HiveUtils.getConstString(argOIs[1]);
    this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(typeString);

    if (argOIs.length == 3) {
        final ObjectInspector argOI2 = argOIs[2];
        if (HiveUtils.isConstString(argOI2)) {
            String names = HiveUtils.getConstString(argOI2);
            this.columnNames = ArrayUtils.asKryoSerializableList(names.split(","));
        } else if (HiveUtils.isConstStringListOI(argOI2)) {
            this.columnNames =
                    ArrayUtils.asKryoSerializableList(HiveUtils.getConstStringArray(argOI2));
        } else {
            throw new UDFArgumentException("Expected `const array<string>` or `const string`"
                    + " but got an unexpected OI type for the third argument: " + argOI2);
        }
    }

    return getObjectInspector(columnTypes, columnNames);
}

Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testRWNull() throws Exception {
    List<Object> nlist = new ArrayList<Object>(13);
    {
        nlist.add(null); // tinyint
        nlist.add(null); // smallint
        nlist.add(null); // int
        nlist.add(null); // bigint
        nlist.add(null); // double
        nlist.add(null); // float
        nlist.add(null); // string
        nlist.add(null); // string
        nlist.add(null); // struct
        nlist.add(null); // array
        nlist.add(null); // map
        nlist.add(null); // bool
        nlist.add(null); // complex
        nlist.add(null); //decimal(5,2)
        nlist.add(null); //char(10)
        nlist.add(null); //varchar(20)
        nlist.add(null); //date
        nlist.add(null); //timestamp
        nlist.add(null); //binary
    }

    DefaultHCatRecord r = new DefaultHCatRecord(nlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(nlist, deserialized);
}

Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0

4 votes

@Test
public void testRW() throws Exception {
    List<Object> rlist = new ArrayList<Object>(13);
    {
        rlist.add(new Byte("123"));
        rlist.add(new Short("456"));
        rlist.add(new Integer(789));
        rlist.add(new Long(1000L));
        rlist.add(new Double(5.3D));
        rlist.add(new Float(2.39F));
        rlist.add(new String("hcat\nand\nhadoop"));
        rlist.add(null);

        List<Object> innerStruct = new ArrayList<Object>(2);
        innerStruct.add(new String("abc"));
        innerStruct.add(new String("def"));
        rlist.add(innerStruct);

        List<Integer> innerList = new ArrayList<Integer>();
        innerList.add(314);
        innerList.add(007);
        rlist.add(innerList);

        Map<Short, String> map = new HashMap<Short, String>(3);
        map.put(new Short("2"), "hcat is cool");
        map.put(new Short("3"), "is it?");
        map.put(new Short("4"), "or is it not?");
        rlist.add(map);

        rlist.add(new Boolean(true));

        List<Object> c1 = new ArrayList<Object>();
        List<Object> c1_1 = new ArrayList<Object>();
        c1_1.add(new Integer(12));
        List<Object> i2 = new ArrayList<Object>();
        List<Integer> ii1 = new ArrayList<Integer>();
        ii1.add(new Integer(13));
        ii1.add(new Integer(14));
        i2.add(ii1);
        Map<String, List<?>> ii2 = new HashMap<String, List<?>>();
        List<Integer> iii1 = new ArrayList<Integer>();
        iii1.add(new Integer(15));
        ii2.put("phew", iii1);
        i2.add(ii2);
        c1_1.add(i2);
        c1.add(c1_1);
        rlist.add(c1);
        rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2
        rlist.add(new HiveChar("hive\nchar", 10));
        rlist.add(new HiveVarchar("hive\nvarchar", 20));
        rlist.add(Date.valueOf("2014-01-07"));
        rlist.add(new Timestamp(System.currentTimeMillis()));
        rlist.add("hive\nbinary".getBytes("UTF-8"));
    }

    DefaultHCatRecord r = new DefaultHCatRecord(rlist);

    List<String> columnNames =
            Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(","));
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(
        "tinyint,smallint,int,bigint,double,float,string,string,"
                + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
                + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>,"
                + "decimal(5,2),char(10),varchar(20),date,timestamp,binary");

    StructTypeInfo rowTypeInfo =
            (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    HCatRecordObjectInspector objInspector =
            HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);

    Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames);
    List<Object> deserialized =
            JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes);

    assertRecordEquals(rlist, deserialized);
}

Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0

4 votes

/**
 * This test tests that our json deserialization is not too strict, as per HIVE-6166
 *
 * i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like :
 * 
 * <pre>
 *                        {
 *                            "x" : "abc" ,
 *                            "t" : {
 *                                "a" : "1",
 *                                "b" : "2",
 *                                "c" : [
 *                                    { "x" : 2 , "y" : 3 } ,
 *                                    { "x" : 3 , "y" : 2 }
 *                                ]
 *                            } ,
 *                            "s" : {
 *                                "a" : 2 ,
 *                                "b" : "blah",
 *                                "c": "woo"
 *                            }
 *                        }
 * </pre>
 *
 * Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it
 * should read k as null.
 */
@Test
public void testLooseJsonReadability() throws Exception {
    List<String> columnNames = Arrays.asList("s,k".split(","));
    List<TypeInfo> columnTypes =
            TypeInfoUtils.getTypeInfosFromTypeString("struct<a:int,b:string>,int");

    Text jsonText1 = new Text("{ \"x\" : \"abc\" , "
            + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ,"
            + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }");

    Text jsonText2 = new Text("{ \"x\" : \"abc\" , "
            + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ,"
            + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } , " + "\"k\" : 113 "
            + "}");

    List<Object> expected1 = Arrays.<Object>asList(Arrays.asList(2, "blah"), null);
    List<Object> expected2 = Arrays.<Object>asList(Arrays.asList(2, "blah"), 113);
    List<Object> result1 = JsonSerdeUtils.deserialize(jsonText1, columnNames, columnTypes);
    List<Object> result2 = JsonSerdeUtils.deserialize(jsonText2, columnNames, columnTypes);

    Assert.assertEquals(expected1, result1);
    Assert.assertEquals(expected2, result2);
}

Source File: EmoSerDe.java From emodb with Apache License 2.0

4 votes

@Override
public void initialize(Configuration config, Properties properties)
        throws SerDeException {
    // Get the column names and types from the configuration properties
    String columnNamesProperty = properties.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypesProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);

    List<String> columnNames;
    List<TypeInfo> columnTypes;
    List<ObjectInspector> columnInspectors;

    if (columnNamesProperty.isEmpty()) {
        columnNames = ImmutableList.of();
    } else {
        columnNames = Arrays.asList(columnNamesProperty.split(","));
    }

    if (columnTypesProperty.isEmpty()) {
        columnTypes = ImmutableList.of();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty);
    }

    int numColumns = columnNames.size();
    checkArgument(columnTypes.size() == numColumns);

    _columns = Lists.newArrayListWithCapacity(numColumns);
    _values = Lists.newArrayListWithCapacity(numColumns);
    columnInspectors = Lists.newArrayListWithCapacity(numColumns);

    // Initialize the types and inspectors for each column
    for (int i=0; i < numColumns; i++) {
        TypeInfo type = columnTypes.get(i);

        ObjectInspector columnInspector = getObjectInspectorForType(type);

        _columns.add(Maps.immutableEntry(columnNames.get(i), type));
        _values.add(null);

        columnInspectors.add(columnInspector);
    }

    _inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnInspectors);
}

Source File: ExcelSerde.java From hadoopoffice with Apache License 2.0

4 votes

/**
 * Initializes the SerDe \n
 * You can define in the table properties (additionally to the standard Hive properties) the following options \n
 * office.hive.write.defaultSheetName: The sheetname to which data should be written (note: as an input any sheets can be read or selected sheets according to HadoopOffice configuration values) \n
 * Any of the HadoopOffice options (hadoopoffice.*), such as encryption, signing, low footprint mode, linked workbooks, can be defined in the table properties @see <a href="https://github.com/ZuInnoTe/hadoopoffice/wiki/Hadoop-File-Format">HadoopOffice configuration</a>\n
 * @param conf Hadoop Configuration
 * @param prop table properties. 
 * @param partitionProperties ignored. Partitions are not supported.
 */

@Override
public void initialize(Configuration conf, Properties prop, Properties partitionProperties) throws SerDeException {
	LOG.debug("Initializing Excel Hive Serde");
	LOG.debug("Configuring Hive-only options");
	// configure hadoopoffice specific hive options

	String defaultSheetNameStr = prop.getProperty(ExcelSerde.CONF_DEFAULTSHEETNAME);
	if (defaultSheetNameStr != null) {
		this.defaultSheetName = defaultSheetNameStr;
	}
// copy hadoopoffice options
	LOG.debug("Configuring HadoopOffice Format");
	Set<Entry<Object, Object>> entries = prop.entrySet();
	for (Entry<Object, Object> entry : entries) {
		if ((entry.getKey() instanceof String) && ((String) entry.getKey()).startsWith(ExcelSerde.HOSUFFIX)) {
			if (("TRUE".equalsIgnoreCase((String) entry.getValue()))
					|| ("FALSE".equalsIgnoreCase(((String) entry.getValue())))) {
				conf.setBoolean((String) entry.getKey(), Boolean.valueOf((String) entry.getValue()));
			} else {
				conf.set((String) entry.getKey(), (String) entry.getValue());
			}
		}
	}

	// create object inspector (always a struct = row)
	LOG.debug("Creating object inspector");
	this.columnNames = Arrays.asList(prop.getProperty(serdeConstants.LIST_COLUMNS).split(","));
	this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(prop.getProperty(serdeConstants.LIST_COLUMN_TYPES));
	final List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
	for (TypeInfo currentColumnType : columnTypes) {
		columnOIs.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(currentColumnType));
	}
	this.oi = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
	// create converter
	LOG.debug("Creating converter");
	HadoopOfficeReadConfiguration hocr = new HadoopOfficeReadConfiguration(conf);
	this.readConverter = new ExcelConverterSimpleSpreadSheetCellDAO(hocr.getSimpleDateFormat(), hocr.getSimpleDecimalFormat(), hocr.getSimpleDateTimeFormat());
	HadoopOfficeWriteConfiguration howc = new HadoopOfficeWriteConfiguration(conf,"");
	this.writeConverter = new ExcelConverterSimpleSpreadSheetCellDAO(howc.getSimpleDateFormat(), howc.getSimpleDecimalFormat(), howc.getSimpleDateTimeFormat());
	// configure writing of header
	this.writeHeader=howc.getWriteHeader();
	GenericDataType[] columnsGD = new GenericDataType[columnNames.size()];
	for (int i = 0; i < columnOIs.size(); i++) {
		ObjectInspector currentOI = columnOIs.get(i);
		if (currentOI instanceof BooleanObjectInspector) {
			columnsGD[i] = new GenericBooleanDataType();
		} else if (currentOI instanceof DateObjectInspector) {
			columnsGD[i] = new GenericDateDataType();
		} else if (currentOI instanceof TimestampObjectInspector) {
			columnsGD[i] = new GenericTimestampDataType();
		}
		else if (currentOI instanceof ByteObjectInspector) {
			columnsGD[i] = new GenericByteDataType();
		} else if (currentOI instanceof ShortObjectInspector) {
			columnsGD[i] = new GenericShortDataType();
		} else if (currentOI instanceof IntObjectInspector) {
			columnsGD[i] = new GenericIntegerDataType();
		} else if (currentOI instanceof LongObjectInspector) {
			columnsGD[i] = new GenericLongDataType();
		} else if (currentOI instanceof DoubleObjectInspector) {
			columnsGD[i] = new GenericDoubleDataType();
		} else if (currentOI instanceof FloatObjectInspector) {
			columnsGD[i] = new GenericFloatDataType();
		} else if (currentOI instanceof HiveDecimalObjectInspector) {
			HiveDecimalObjectInspector currentOIHiveDecimalOI = (HiveDecimalObjectInspector) currentOI;
			columnsGD[i] = new GenericBigDecimalDataType(currentOIHiveDecimalOI.precision(),
					currentOIHiveDecimalOI.scale());
		} else if (currentOI instanceof StringObjectInspector) {
			columnsGD[i] = new GenericStringDataType();
		} else {
			LOG.warn("Could not detect desired datatype for column " + i + ". Type " + currentOI.getTypeName()
					+ ". Using String");
			columnsGD[i] = new GenericStringDataType();
		}
	}
	this.readConverter.setSchemaRow(columnsGD);
	this.writeConverter.setSchemaRow(columnsGD);
	// create nullrow
	this.nullRow = new Object[this.columnNames.size()];
	// set writerow
	this.currentWriteRow = 0;
	// set outputrow
	this.outputRow = new Object[this.columnNames.size()];
	LOG.debug("Finished Initialization");
}

Source File: SolrSerde.java From hive-solr with MIT License

4 votes

@Override
public void initialize(@Nullable Configuration configuration, Properties tbl) throws SerDeException {

    row=new ArrayList<Object>();

    // Read Column Names
    String columnNameProp = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    if (columnNameProp != null && columnNameProp.length() > 0) {
        columnNames = Arrays.asList(columnNameProp.split(","));
    } else {
        columnNames = new ArrayList<String>();
    }

    // Read Column Types
    String columnTypeProp = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    // default all string
    if (columnTypeProp == null) {
        String[] types = new String[columnNames.size()];
        Arrays.fill(types, 0, types.length, serdeConstants.STRING_TYPE_NAME);
        columnTypeProp = StringUtils.join(types, ":");
    }
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProp);

    // Check column and types equals
    if (columnTypes.size() != columnNames.size()) {
        throw new SerDeException("len(columnNames) != len(columntTypes)");
    }

    // Create ObjectInspectors from the type information for each column
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>();
    ObjectInspector oi;
    for (int c = 0; c < columnNames.size(); c++) {
        oi = TypeInfoUtils
                .getStandardJavaObjectInspectorFromTypeInfo(columnTypes
                        .get(c));
        columnOIs.add(oi);
    }
    objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);





}

Source File: SMSerDe.java From spliceengine with GNU Affero General Public License v3.0

4 votes

/**
  * An initialization function used to gather information about the table.
  * Typically, a SerDe implementation will be interested in the list of
  * column names and their types. That information will be used to help
  * perform actual serialization and deserialization of data.
  */
 //@Override
 public void initialize(Configuration conf, Properties tbl) throws SerDeException {
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "initialize with conf=%s, tbl=%s",conf,tbl);
     // Get a list of the table's column names.
     tableName = tbl.getProperty(MRConstants.SPLICE_TABLE_NAME);
     String hbaseDir = null;
     if (conf != null) {
         hbaseDir = conf.get(HConstants.HBASE_DIR);
     }
     if (hbaseDir == null)
     	hbaseDir = System.getProperty(HConstants.HBASE_DIR);
     if (hbaseDir == null)
     	throw new SerDeException("hbase root directory not set, please include hbase.rootdir in config or via -D system property ...");
     if (conf != null) {
         conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName);
         conf.set(MRConstants.SPLICE_JDBC_STR, tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
         conf.set(HConstants.HBASE_DIR, hbaseDir);
         if (conf.get(HiveConf.ConfVars.POSTEXECHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "com.splicemachine.mrio.api.hive.PostExecHook");
         }
         if (conf.get(HiveConf.ConfVars.ONFAILUREHOOKS.varname) == null) {
             conf.set(HiveConf.ConfVars.ONFAILUREHOOKS.varname, "com.splicemachine.mrio.api.hive.FailureExecHook");
         }
     }

     if (sqlUtil == null)
         sqlUtil = SMSQLUtil.getInstance(tbl.getProperty(MRConstants.SPLICE_JDBC_STR));
     String colNamesStr = tbl.getProperty(Constants.LIST_COLUMNS);
     colNames.clear();
     for (String split: colNamesStr.split(","))
     	colNames.add(split.toUpperCase());
     String colTypesStr = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
     colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
     objectCache = new ArrayList<Object>(colTypes.size());
     if (tableName != null) {
         tableName = tableName.trim().toUpperCase();
         try {
             if (!sqlUtil.checkTableExists(tableName))
             	throw new SerDeException(String.format("table %s does not exist...",tableName));
             if (conf != null) {
                 ScanSetBuilder tableScannerBuilder = sqlUtil.getTableScannerBuilder(tableName, colNames);
                 conf.set(MRConstants.SPLICE_SCAN_INFO, tableScannerBuilder.base64Encode());

               //  TableContext tableContext = sqlUtil.createTableContext(tableName, tableScannerBuilder);
               //  conf.set(MRConstants.SPLICE_TBLE_CONTEXT, tableContext.getTableContextBase64String());
             }
} catch (Exception e) {
	throw new SerDeException(e);
}
     } 
      
 	if (Log.isDebugEnabled())
 		SpliceLogUtils.debug(Log, "generating hive info colNames=%s, colTypes=%s",colNames,colTypes);

     
     rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
     rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
     //serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName());
     Log.info("--------Finished initialize");
 }

Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils#getTypeInfosFromTypeString()