Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils#getTypeInfosFromTypeString()
The following examples show how to use
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils#getTypeInfosFromTypeString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveUtils.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) { // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table) // use the class since StructType requires it ... List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ","); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES)); // create a standard writable Object Inspector - used later on by serialization/deserialization List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(); for (TypeInfo typeInfo : colTypes) { inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo)); } return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); }
Example 2
Source File: MDSSerde.java From multiple-dimension-spread with Apache License 2.0 | 6 votes |
private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){ Set<String> columnNameSet = new HashSet<String>(); for( String columnName : projectionColumnNames.split(",") ){ columnNameSet.add( columnName ); } ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty ); String[] splitNames = columnNameProperty.split(","); ArrayList<String> projectionColumnNameList = new ArrayList<String>(); ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>(); for( int i = 0 ; i < fieldTypes.size() ; i++ ){ if( columnNameSet.contains( splitNames[i] ) ){ projectionColumnNameList.add( splitNames[i] ); projectionFieldTypeList.add( fieldTypes.get(i) ); } filedIndexMap.put( splitNames[i] , i ); } StructTypeInfo rootType = new StructTypeInfo(); rootType.setAllStructFieldNames( projectionColumnNameList ); rootType.setAllStructFieldTypeInfos( projectionFieldTypeList ); return rootType; }
Example 3
Source File: JSONSerDe.java From searchanalytics-bigdata with MIT License | 6 votes |
/** * An initialization function used to gather information about the table. * Typically, a SerDe implementation will be interested in the list of * column names and their types. That information will be used to help * perform actual serialization and deserialization of data. */ @Override public void initialize(final Configuration conf, final Properties tbl) throws SerDeException { // Get a list of the table's column names. final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS); // Jai...change column names to lower case. colNames = Arrays.asList(colNamesStr.toLowerCase().split(",")); // Get a list of TypeInfos for the columns. This list lines up with // the list of column names. final String colTypesStr = tbl .getProperty(serdeConstants.LIST_COLUMN_TYPES); final List<TypeInfo> colTypes = TypeInfoUtils .getTypeInfosFromTypeString(colTypesStr); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo( colNames, colTypes); rowOI = TypeInfoUtils .getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo); }
Example 4
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Test public void testMapValues() throws SerDeException { List<String> columnNames = Arrays.asList("a,b".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString("array<string>,map<string,int>"); Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} "); Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}"); Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}"); List<Object> expected1 = Arrays.<Object>asList(Arrays.<String>asList("aaa"), createHashMapStringInteger("bbb", 1)); List<Object> expected2 = Arrays.<Object>asList(Arrays.<String>asList("yyy"), createHashMapStringInteger("zzz", 123)); List<Object> expected3 = Arrays.<Object>asList(Arrays.<String>asList("a"), createHashMapStringInteger("x", 11, "y", 22, "z", null)); List<Object> result1 = JsonSerdeUtils.deserialize(text1, columnNames, columnTypes); List<Object> result2 = JsonSerdeUtils.deserialize(text2, columnNames, columnTypes); List<Object> result3 = JsonSerdeUtils.deserialize(text3, columnNames, columnTypes); Assert.assertEquals(expected1, result1); Assert.assertEquals(expected2, result2); Assert.assertEquals(expected3, result3); }
Example 5
Source File: JSONCDHSerDe.java From bigdata-tutorial with Apache License 2.0 | 6 votes |
/** * An initialization function used to gather information about the table. * Typically, a SerDe implementation will be interested in the list of * column names and their types. That information will be used to help perform * actual serialization and deserialization of data. */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // Get a list of the table's column names. String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS); colNames = Arrays.asList(colNamesStr.split(",")); // Get a list of TypeInfos for the columns. This list lines up with // the list of column names. String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes); rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo); }
Example 6
Source File: OrcSerde.java From hive-dwrf with Apache License 2.0 | 5 votes |
@Override public void initialize(Configuration conf, Properties table) { // Read the configuration parameters String columnNameProperty = table.getProperty("columns"); // NOTE: if "columns.types" is missing, all columns will be of String type String columnTypeProperty = table.getProperty("columns.types"); // Parse the configuration parameters ArrayList<String> columnNames = EMPTY_STRING_ARRAYLIST; if (columnNameProperty != null && columnNameProperty.length() > 0) { String[] splits = columnNameProperty.split(","); columnNames = new ArrayList<String>(splits.length); for(String name: splits) { columnNames.add(name); } } if (columnTypeProperty == null) { // Default type: all string StringBuilder sb = new StringBuilder(); for (int i = 0; i < columnNames.size(); i++) { if (i > 0) { sb.append(":"); } sb.append("string"); } columnTypeProperty = sb.toString(); } ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); StructTypeInfo rootType = new StructTypeInfo(); rootType.setAllStructFieldNames(columnNames); rootType.setAllStructFieldTypeInfos(fieldTypes); inspector = new OrcLazyRowObjectInspector(rootType); }
Example 7
Source File: TestHiveSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) { List<TypeInfo> columnTypes; if (columnsTypeStr.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr); } return columnTypes; }
Example 8
Source File: ParquetHiveSerDe.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); // Stats part stats = new SerDeStats(); serializedSize = 0; deserializedSize = 0; status = LAST_OPERATION.UNKNOWN; }
Example 9
Source File: MapredParquetOutputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * Create the parquet schema from the hive schema, and return the RecordWriterWrapper which * contains the real output format */ @Override public FileSinkOperator.RecordWriter getHiveRecordWriter( final JobConf jobConf, final Path finalOutPath, final Class<? extends Writable> valueClass, final boolean isCompressed, final Properties tableProperties, final Progressable progress) throws IOException { LOG.info("creating new record writer...{}", this); final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES); List<String> columnNames; List<TypeInfo> columnTypes; if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf); return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress); }
Example 10
Source File: AvroSchemaGenerator.java From HiveKa with Apache License 2.0 | 5 votes |
public Schema getSchema(String columnNamesStr, String columnTypesStr, String columnCommentsStr, String namespace, String name, String doc) { List<String> columnNames = Arrays.asList(columnNamesStr.split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesStr); List<String> columnComments; if (columnCommentsStr.isEmpty()) { columnComments = new ArrayList<String>(); } else { columnComments = Arrays.asList(columnCommentsStr.split(",")); } return typeInfoToSchema.convert(columnNames, columnTypes, columnComments, namespace, name, doc); }
Example 11
Source File: LWSerDe.java From hive-solr with Apache License 2.0 | 5 votes |
@Override public void initialize(Configuration conf, Properties tblProperties) throws SerDeException { colNames = Arrays.asList(tblProperties.getProperty(serdeConstants.LIST_COLUMNS).split(",")); colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tblProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES)); typeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes); inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); row = new ArrayList<>(); enableFieldMapping = Boolean.valueOf(tblProperties.getProperty(ENABLE_FIELD_MAPPING, "false")); }
Example 12
Source File: IndexRSerde.java From indexr with Apache License 2.0 | 5 votes |
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS); String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES); if (Strings.isEmpty(columnNameProperty)) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (Strings.isEmpty(columnTypeProperty)) { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size())); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); stats = new SerDeStats(); serdeSize = 0; }
Example 13
Source File: FromJsonUDF.java From incubator-hivemall with Apache License 2.0 | 5 votes |
@Override public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { throw new UDFArgumentException( "from_json takes two or three arguments: " + argOIs.length); } this.jsonOI = HiveUtils.asStringOI(argOIs[0]); String typeString = HiveUtils.getConstString(argOIs[1]); this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(typeString); if (argOIs.length == 3) { final ObjectInspector argOI2 = argOIs[2]; if (HiveUtils.isConstString(argOI2)) { String names = HiveUtils.getConstString(argOI2); this.columnNames = ArrayUtils.asKryoSerializableList(names.split(",")); } else if (HiveUtils.isConstStringListOI(argOI2)) { this.columnNames = ArrayUtils.asKryoSerializableList(HiveUtils.getConstStringArray(argOI2)); } else { throw new UDFArgumentException("Expected `const array<string>` or `const string`" + " but got an unexpected OI type for the third argument: " + argOI2); } } return getObjectInspector(columnTypes, columnNames); }
Example 14
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 4 votes |
@Test public void testRWNull() throws Exception { List<Object> nlist = new ArrayList<Object>(13); { nlist.add(null); // tinyint nlist.add(null); // smallint nlist.add(null); // int nlist.add(null); // bigint nlist.add(null); // double nlist.add(null); // float nlist.add(null); // string nlist.add(null); // string nlist.add(null); // struct nlist.add(null); // array nlist.add(null); // map nlist.add(null); // bool nlist.add(null); // complex nlist.add(null); //decimal(5,2) nlist.add(null); //char(10) nlist.add(null); //varchar(20) nlist.add(null); //date nlist.add(null); //timestamp nlist.add(null); //binary } DefaultHCatRecord r = new DefaultHCatRecord(nlist); List<String> columnNames = Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( "tinyint,smallint,int,bigint,double,float,string,string," + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); HCatRecordObjectInspector objInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); List<Object> deserialized = JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); assertRecordEquals(nlist, deserialized); }
Example 15
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 4 votes |
@Test public void testRW() throws Exception { List<Object> rlist = new ArrayList<Object>(13); { rlist.add(new Byte("123")); rlist.add(new Short("456")); rlist.add(new Integer(789)); rlist.add(new Long(1000L)); rlist.add(new Double(5.3D)); rlist.add(new Float(2.39F)); rlist.add(new String("hcat\nand\nhadoop")); rlist.add(null); List<Object> innerStruct = new ArrayList<Object>(2); innerStruct.add(new String("abc")); innerStruct.add(new String("def")); rlist.add(innerStruct); List<Integer> innerList = new ArrayList<Integer>(); innerList.add(314); innerList.add(007); rlist.add(innerList); Map<Short, String> map = new HashMap<Short, String>(3); map.put(new Short("2"), "hcat is cool"); map.put(new Short("3"), "is it?"); map.put(new Short("4"), "or is it not?"); rlist.add(map); rlist.add(new Boolean(true)); List<Object> c1 = new ArrayList<Object>(); List<Object> c1_1 = new ArrayList<Object>(); c1_1.add(new Integer(12)); List<Object> i2 = new ArrayList<Object>(); List<Integer> ii1 = new ArrayList<Integer>(); ii1.add(new Integer(13)); ii1.add(new Integer(14)); i2.add(ii1); Map<String, List<?>> ii2 = new HashMap<String, List<?>>(); List<Integer> iii1 = new ArrayList<Integer>(); iii1.add(new Integer(15)); ii2.put("phew", iii1); i2.add(ii2); c1_1.add(i2); c1.add(c1_1); rlist.add(c1); rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2 rlist.add(new HiveChar("hive\nchar", 10)); rlist.add(new HiveVarchar("hive\nvarchar", 20)); rlist.add(Date.valueOf("2014-01-07")); rlist.add(new Timestamp(System.currentTimeMillis())); rlist.add("hive\nbinary".getBytes("UTF-8")); } DefaultHCatRecord r = new DefaultHCatRecord(rlist); List<String> columnNames = Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( "tinyint,smallint,int,bigint,double,float,string,string," + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); HCatRecordObjectInspector objInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); List<Object> deserialized = JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); assertRecordEquals(rlist, deserialized); }
Example 16
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 4 votes |
/** * This test tests that our json deserialization is not too strict, as per HIVE-6166 * * i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like : * * <pre> * { * "x" : "abc" , * "t" : { * "a" : "1", * "b" : "2", * "c" : [ * { "x" : 2 , "y" : 3 } , * { "x" : 3 , "y" : 2 } * ] * } , * "s" : { * "a" : 2 , * "b" : "blah", * "c": "woo" * } * } * </pre> * * Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it * should read k as null. */ @Test public void testLooseJsonReadability() throws Exception { List<String> columnNames = Arrays.asList("s,k".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString("struct<a:int,b:string>,int"); Text jsonText1 = new Text("{ \"x\" : \"abc\" , " + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ," + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }"); Text jsonText2 = new Text("{ \"x\" : \"abc\" , " + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ," + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } , " + "\"k\" : 113 " + "}"); List<Object> expected1 = Arrays.<Object>asList(Arrays.asList(2, "blah"), null); List<Object> expected2 = Arrays.<Object>asList(Arrays.asList(2, "blah"), 113); List<Object> result1 = JsonSerdeUtils.deserialize(jsonText1, columnNames, columnTypes); List<Object> result2 = JsonSerdeUtils.deserialize(jsonText2, columnNames, columnTypes); Assert.assertEquals(expected1, result1); Assert.assertEquals(expected2, result2); }
Example 17
Source File: EmoSerDe.java From emodb with Apache License 2.0 | 4 votes |
@Override public void initialize(Configuration config, Properties properties) throws SerDeException { // Get the column names and types from the configuration properties String columnNamesProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); String columnTypesProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); List<String> columnNames; List<TypeInfo> columnTypes; List<ObjectInspector> columnInspectors; if (columnNamesProperty.isEmpty()) { columnNames = ImmutableList.of(); } else { columnNames = Arrays.asList(columnNamesProperty.split(",")); } if (columnTypesProperty.isEmpty()) { columnTypes = ImmutableList.of(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty); } int numColumns = columnNames.size(); checkArgument(columnTypes.size() == numColumns); _columns = Lists.newArrayListWithCapacity(numColumns); _values = Lists.newArrayListWithCapacity(numColumns); columnInspectors = Lists.newArrayListWithCapacity(numColumns); // Initialize the types and inspectors for each column for (int i=0; i < numColumns; i++) { TypeInfo type = columnTypes.get(i); ObjectInspector columnInspector = getObjectInspectorForType(type); _columns.add(Maps.immutableEntry(columnNames.get(i), type)); _values.add(null); columnInspectors.add(columnInspector); } _inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnInspectors); }
Example 18
Source File: ExcelSerde.java From hadoopoffice with Apache License 2.0 | 4 votes |
/** * Initializes the SerDe \n * You can define in the table properties (additionally to the standard Hive properties) the following options \n * office.hive.write.defaultSheetName: The sheetname to which data should be written (note: as an input any sheets can be read or selected sheets according to HadoopOffice configuration values) \n * Any of the HadoopOffice options (hadoopoffice.*), such as encryption, signing, low footprint mode, linked workbooks, can be defined in the table properties @see <a href="https://github.com/ZuInnoTe/hadoopoffice/wiki/Hadoop-File-Format">HadoopOffice configuration</a>\n * @param conf Hadoop Configuration * @param prop table properties. * @param partitionProperties ignored. Partitions are not supported. */ @Override public void initialize(Configuration conf, Properties prop, Properties partitionProperties) throws SerDeException { LOG.debug("Initializing Excel Hive Serde"); LOG.debug("Configuring Hive-only options"); // configure hadoopoffice specific hive options String defaultSheetNameStr = prop.getProperty(ExcelSerde.CONF_DEFAULTSHEETNAME); if (defaultSheetNameStr != null) { this.defaultSheetName = defaultSheetNameStr; } // copy hadoopoffice options LOG.debug("Configuring HadoopOffice Format"); Set<Entry<Object, Object>> entries = prop.entrySet(); for (Entry<Object, Object> entry : entries) { if ((entry.getKey() instanceof String) && ((String) entry.getKey()).startsWith(ExcelSerde.HOSUFFIX)) { if (("TRUE".equalsIgnoreCase((String) entry.getValue())) || ("FALSE".equalsIgnoreCase(((String) entry.getValue())))) { conf.setBoolean((String) entry.getKey(), Boolean.valueOf((String) entry.getValue())); } else { conf.set((String) entry.getKey(), (String) entry.getValue()); } } } // create object inspector (always a struct = row) LOG.debug("Creating object inspector"); this.columnNames = Arrays.asList(prop.getProperty(serdeConstants.LIST_COLUMNS).split(",")); this.columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(prop.getProperty(serdeConstants.LIST_COLUMN_TYPES)); final List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size()); for (TypeInfo currentColumnType : columnTypes) { columnOIs.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(currentColumnType)); } this.oi = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); // create converter LOG.debug("Creating converter"); HadoopOfficeReadConfiguration hocr = new HadoopOfficeReadConfiguration(conf); this.readConverter = new ExcelConverterSimpleSpreadSheetCellDAO(hocr.getSimpleDateFormat(), hocr.getSimpleDecimalFormat(), hocr.getSimpleDateTimeFormat()); HadoopOfficeWriteConfiguration howc = new HadoopOfficeWriteConfiguration(conf,""); this.writeConverter = new ExcelConverterSimpleSpreadSheetCellDAO(howc.getSimpleDateFormat(), howc.getSimpleDecimalFormat(), howc.getSimpleDateTimeFormat()); // configure writing of header this.writeHeader=howc.getWriteHeader(); GenericDataType[] columnsGD = new GenericDataType[columnNames.size()]; for (int i = 0; i < columnOIs.size(); i++) { ObjectInspector currentOI = columnOIs.get(i); if (currentOI instanceof BooleanObjectInspector) { columnsGD[i] = new GenericBooleanDataType(); } else if (currentOI instanceof DateObjectInspector) { columnsGD[i] = new GenericDateDataType(); } else if (currentOI instanceof TimestampObjectInspector) { columnsGD[i] = new GenericTimestampDataType(); } else if (currentOI instanceof ByteObjectInspector) { columnsGD[i] = new GenericByteDataType(); } else if (currentOI instanceof ShortObjectInspector) { columnsGD[i] = new GenericShortDataType(); } else if (currentOI instanceof IntObjectInspector) { columnsGD[i] = new GenericIntegerDataType(); } else if (currentOI instanceof LongObjectInspector) { columnsGD[i] = new GenericLongDataType(); } else if (currentOI instanceof DoubleObjectInspector) { columnsGD[i] = new GenericDoubleDataType(); } else if (currentOI instanceof FloatObjectInspector) { columnsGD[i] = new GenericFloatDataType(); } else if (currentOI instanceof HiveDecimalObjectInspector) { HiveDecimalObjectInspector currentOIHiveDecimalOI = (HiveDecimalObjectInspector) currentOI; columnsGD[i] = new GenericBigDecimalDataType(currentOIHiveDecimalOI.precision(), currentOIHiveDecimalOI.scale()); } else if (currentOI instanceof StringObjectInspector) { columnsGD[i] = new GenericStringDataType(); } else { LOG.warn("Could not detect desired datatype for column " + i + ". Type " + currentOI.getTypeName() + ". Using String"); columnsGD[i] = new GenericStringDataType(); } } this.readConverter.setSchemaRow(columnsGD); this.writeConverter.setSchemaRow(columnsGD); // create nullrow this.nullRow = new Object[this.columnNames.size()]; // set writerow this.currentWriteRow = 0; // set outputrow this.outputRow = new Object[this.columnNames.size()]; LOG.debug("Finished Initialization"); }
Example 19
Source File: SolrSerde.java From hive-solr with MIT License | 4 votes |
@Override public void initialize(@Nullable Configuration configuration, Properties tbl) throws SerDeException { row=new ArrayList<Object>(); // Read Column Names String columnNameProp = tbl.getProperty(serdeConstants.LIST_COLUMNS); if (columnNameProp != null && columnNameProp.length() > 0) { columnNames = Arrays.asList(columnNameProp.split(",")); } else { columnNames = new ArrayList<String>(); } // Read Column Types String columnTypeProp = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); // default all string if (columnTypeProp == null) { String[] types = new String[columnNames.size()]; Arrays.fill(types, 0, types.length, serdeConstants.STRING_TYPE_NAME); columnTypeProp = StringUtils.join(types, ":"); } columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProp); // Check column and types equals if (columnTypes.size() != columnNames.size()) { throw new SerDeException("len(columnNames) != len(columntTypes)"); } // Create ObjectInspectors from the type information for each column List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(); ObjectInspector oi; for (int c = 0; c < columnNames.size(); c++) { oi = TypeInfoUtils .getStandardJavaObjectInspectorFromTypeInfo(columnTypes .get(c)); columnOIs.add(oi); } objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); }
Example 20
Source File: SMSerDe.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
/** * An initialization function used to gather information about the table. * Typically, a SerDe implementation will be interested in the list of * column names and their types. That information will be used to help * perform actual serialization and deserialization of data. */ //@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { if (Log.isDebugEnabled()) SpliceLogUtils.debug(Log, "initialize with conf=%s, tbl=%s",conf,tbl); // Get a list of the table's column names. tableName = tbl.getProperty(MRConstants.SPLICE_TABLE_NAME); String hbaseDir = null; if (conf != null) { hbaseDir = conf.get(HConstants.HBASE_DIR); } if (hbaseDir == null) hbaseDir = System.getProperty(HConstants.HBASE_DIR); if (hbaseDir == null) throw new SerDeException("hbase root directory not set, please include hbase.rootdir in config or via -D system property ..."); if (conf != null) { conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName); conf.set(MRConstants.SPLICE_JDBC_STR, tbl.getProperty(MRConstants.SPLICE_JDBC_STR)); conf.set(HConstants.HBASE_DIR, hbaseDir); if (conf.get(HiveConf.ConfVars.POSTEXECHOOKS.varname) == null) { conf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "com.splicemachine.mrio.api.hive.PostExecHook"); } if (conf.get(HiveConf.ConfVars.ONFAILUREHOOKS.varname) == null) { conf.set(HiveConf.ConfVars.ONFAILUREHOOKS.varname, "com.splicemachine.mrio.api.hive.FailureExecHook"); } } if (sqlUtil == null) sqlUtil = SMSQLUtil.getInstance(tbl.getProperty(MRConstants.SPLICE_JDBC_STR)); String colNamesStr = tbl.getProperty(Constants.LIST_COLUMNS); colNames.clear(); for (String split: colNamesStr.split(",")) colNames.add(split.toUpperCase()); String colTypesStr = tbl.getProperty(Constants.LIST_COLUMN_TYPES); colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr); objectCache = new ArrayList<Object>(colTypes.size()); if (tableName != null) { tableName = tableName.trim().toUpperCase(); try { if (!sqlUtil.checkTableExists(tableName)) throw new SerDeException(String.format("table %s does not exist...",tableName)); if (conf != null) { ScanSetBuilder tableScannerBuilder = sqlUtil.getTableScannerBuilder(tableName, colNames); conf.set(MRConstants.SPLICE_SCAN_INFO, tableScannerBuilder.base64Encode()); // TableContext tableContext = sqlUtil.createTableContext(tableName, tableScannerBuilder); // conf.set(MRConstants.SPLICE_TBLE_CONTEXT, tableContext.getTableContextBase64String()); } } catch (Exception e) { throw new SerDeException(e); } } if (Log.isDebugEnabled()) SpliceLogUtils.debug(Log, "generating hive info colNames=%s, colTypes=%s",colNames,colTypes); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes); rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo); //serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName()); Log.info("--------Finished initialize"); }