Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory#getStructTypeInfo()
The following examples show how to use
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory#getStructTypeInfo() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JSONSerDe.java From searchanalytics-bigdata with MIT License | 6 votes |
/** * An initialization function used to gather information about the table. * Typically, a SerDe implementation will be interested in the list of * column names and their types. That information will be used to help * perform actual serialization and deserialization of data. */ @Override public void initialize(final Configuration conf, final Properties tbl) throws SerDeException { // Get a list of the table's column names. final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS); // Jai...change column names to lower case. colNames = Arrays.asList(colNamesStr.toLowerCase().split(",")); // Get a list of TypeInfos for the columns. This list lines up with // the list of column names. final String colTypesStr = tbl .getProperty(serdeConstants.LIST_COLUMN_TYPES); final List<TypeInfo> colTypes = TypeInfoUtils .getTypeInfosFromTypeString(colTypesStr); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo( colNames, colTypes); rowOI = TypeInfoUtils .getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo); }
Example 2
Source File: BlurObjectInspectorGenerator.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private TypeInfo getTypeInfo(String fieldType) { if (fieldType.equals(TEXT) || fieldType.equals(STRING) || fieldType.equals(STORED)) { return TypeInfoFactory.stringTypeInfo; } else if (fieldType.equals(LONG)) { return TypeInfoFactory.longTypeInfo; } else if (fieldType.equals(INT)) { return TypeInfoFactory.intTypeInfo; } else if (fieldType.equals(FLOAT)) { return TypeInfoFactory.floatTypeInfo; } else if (fieldType.equals(DOUBLE)) { return TypeInfoFactory.doubleTypeInfo; } else if (fieldType.equals(DATE)) { return TypeInfoFactory.dateTypeInfo; } else if (fieldType.equals(GEO_POINTVECTOR) || fieldType.equals(GEO_RECURSIVEPREFIX) || fieldType.equals(GEO_TERMPREFIX)) { List<TypeInfo> typeInfos = Arrays.asList((TypeInfo) TypeInfoFactory.floatTypeInfo, (TypeInfo) TypeInfoFactory.floatTypeInfo); return TypeInfoFactory.getStructTypeInfo(Arrays.asList(LATITUDE, LONGITUDE), typeInfos); } // Return string for anything that is not a built in type. return TypeInfoFactory.stringTypeInfo; }
Example 3
Source File: JSONCDHSerDe.java From bigdata-tutorial with Apache License 2.0 | 6 votes |
/** * An initialization function used to gather information about the table. * Typically, a SerDe implementation will be interested in the list of * column names and their types. That information will be used to help perform * actual serialization and deserialization of data. */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // Get a list of the table's column names. String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS); colNames = Arrays.asList(colNamesStr.split(",")); // Get a list of TypeInfos for the columns. This list lines up with // the list of column names. String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes); rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo); }
Example 4
Source File: TestNiFiOrcUtils.java From localization_nifi with Apache License 2.0 | 5 votes |
public static TypeInfo buildPrimitiveOrcSchema() { return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoCreator.createBoolean(), TypeInfoCreator.createFloat(), TypeInfoCreator.createDouble(), TypeInfoCreator.createBinary(), TypeInfoCreator.createString())); }
Example 5
Source File: TestNiFiOrcUtils.java From nifi with Apache License 2.0 | 5 votes |
public static TypeInfo buildPrimitiveOrcSchema() { return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoCreator.createBoolean(), TypeInfoCreator.createFloat(), TypeInfoCreator.createDouble(), TypeInfoCreator.createBinary(), TypeInfoCreator.createString())); }
Example 6
Source File: TestNiFiOrcUtils.java From nifi with Apache License 2.0 | 5 votes |
public static TypeInfo buildPrimitiveOrcSchema() { return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"), Arrays.asList( TypeInfoCreator.createInt(), TypeInfoCreator.createLong(), TypeInfoCreator.createBoolean(), TypeInfoCreator.createFloat(), TypeInfoCreator.createDouble(), TypeInfoCreator.createBinary(), TypeInfoCreator.createString())); }
Example 7
Source File: NiFiOrcUtils.java From nifi with Apache License 2.0 | 5 votes |
public static TypeInfo getOrcSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException { List<RecordField> recordFields = recordSchema.getFields(); if (recordFields != null) { List<String> orcFieldNames = new ArrayList<>(recordFields.size()); List<TypeInfo> orcFields = new ArrayList<>(recordFields.size()); recordFields.forEach(recordField -> { String fieldName = hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName(); orcFieldNames.add(fieldName); orcFields.add(getOrcField(recordField.getDataType(), hiveFieldNames)); }); return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields); } return null; }
Example 8
Source File: ParquetHiveSerDe.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { final TypeInfo rowTypeInfo; final List<String> columnNames; final List<TypeInfo> columnTypes; // Get column names and sort order final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES); if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } // Create row related objects rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); // Stats part stats = new SerDeStats(); serializedSize = 0; deserializedSize = 0; status = LAST_OPERATION.UNKNOWN; }
Example 9
Source File: HiveTypeUtil.java From flink with Apache License 2.0 | 5 votes |
@Override public TypeInfo visit(RowType rowType) { List<String> names = rowType.getFieldNames(); List<TypeInfo> typeInfos = new ArrayList<>(names.size()); for (String name : names) { TypeInfo typeInfo = rowType.getTypeAt(rowType.getFieldIndex(name)).accept(this); if (null != typeInfo) { typeInfos.add(typeInfo); } else { return defaultMethod(rowType); } } return TypeInfoFactory.getStructTypeInfo(names, typeInfos); }
Example 10
Source File: LWSerDe.java From hive-solr with Apache License 2.0 | 5 votes |
@Override public void initialize(Configuration conf, Properties tblProperties) throws SerDeException { colNames = Arrays.asList(tblProperties.getProperty(serdeConstants.LIST_COLUMNS).split(",")); colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tblProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES)); typeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes); inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); row = new ArrayList<>(); enableFieldMapping = Boolean.valueOf(tblProperties.getProperty(ENABLE_FIELD_MAPPING, "false")); }
Example 11
Source File: IndexRSerde.java From indexr with Apache License 2.0 | 5 votes |
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS); String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES); if (Strings.isEmpty(columnNameProperty)) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (Strings.isEmpty(columnTypeProperty)) { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size())); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); stats = new SerDeStats(); serdeSize = 0; }
Example 12
Source File: JsonSerdeUtils.java From incubator-hivemall with Apache License 2.0 | 5 votes |
@Nonnull private static Object parseObject(@Nonnull final JsonParser p, @CheckForNull final List<String> columnNames, @CheckForNull final List<TypeInfo> columnTypes) throws JsonParseException, IOException, SerDeException { Preconditions.checkNotNull(columnNames, "columnNames MUST NOT be null in parseObject", SerDeException.class); Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null in parseObject", SerDeException.class); if (columnNames.size() != columnTypes.size()) { throw new SerDeException( "Size of columnNames and columnTypes does not match. #columnNames=" + columnNames.size() + ", #columnTypes=" + columnTypes.size()); } TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); final HCatSchema schema; try { schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema(); } catch (HCatException e) { throw new SerDeException(e); } final List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(), null)); JsonToken token; while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) { // iterate through each token, and create appropriate object here. populateRecord(r, token, p, schema); } if (columnTypes.size() == 1) { return r.get(0); } return r; }
Example 13
Source File: HiveSchemaConverter.java From streamx with Apache License 2.0 | 5 votes |
public static TypeInfo convertStruct(Schema schema) { final List<Field> fields = schema.fields(); final List<String> names = new ArrayList<>(fields.size()); final List<TypeInfo> types = new ArrayList<>(fields.size()); for (Field field : fields) { names.add(field.name()); types.add(convert(field.schema())); } return TypeInfoFactory.getStructTypeInfo(names, types); }
Example 14
Source File: HiveTypeUtil.java From flink with Apache License 2.0 | 5 votes |
@Override public TypeInfo visit(RowType rowType) { List<String> names = rowType.getFieldNames(); List<TypeInfo> typeInfos = new ArrayList<>(names.size()); for (String name : names) { TypeInfo typeInfo = rowType.getTypeAt(rowType.getFieldIndex(name)).accept(new TypeInfoLogicalTypeVisitor(dataType)); if (null != typeInfo) { typeInfos.add(typeInfo); } else { return defaultMethod(rowType); } } return TypeInfoFactory.getStructTypeInfo(names, typeInfos); }
Example 15
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 4 votes |
@Test public void testRWNull() throws Exception { List<Object> nlist = new ArrayList<Object>(13); { nlist.add(null); // tinyint nlist.add(null); // smallint nlist.add(null); // int nlist.add(null); // bigint nlist.add(null); // double nlist.add(null); // float nlist.add(null); // string nlist.add(null); // string nlist.add(null); // struct nlist.add(null); // array nlist.add(null); // map nlist.add(null); // bool nlist.add(null); // complex nlist.add(null); //decimal(5,2) nlist.add(null); //char(10) nlist.add(null); //varchar(20) nlist.add(null); //date nlist.add(null); //timestamp nlist.add(null); //binary } DefaultHCatRecord r = new DefaultHCatRecord(nlist); List<String> columnNames = Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( "tinyint,smallint,int,bigint,double,float,string,string," + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); HCatRecordObjectInspector objInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); List<Object> deserialized = JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); assertRecordEquals(nlist, deserialized); }
Example 16
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 4 votes |
@Test public void testRW() throws Exception { List<Object> rlist = new ArrayList<Object>(13); { rlist.add(new Byte("123")); rlist.add(new Short("456")); rlist.add(new Integer(789)); rlist.add(new Long(1000L)); rlist.add(new Double(5.3D)); rlist.add(new Float(2.39F)); rlist.add(new String("hcat\nand\nhadoop")); rlist.add(null); List<Object> innerStruct = new ArrayList<Object>(2); innerStruct.add(new String("abc")); innerStruct.add(new String("def")); rlist.add(innerStruct); List<Integer> innerList = new ArrayList<Integer>(); innerList.add(314); innerList.add(007); rlist.add(innerList); Map<Short, String> map = new HashMap<Short, String>(3); map.put(new Short("2"), "hcat is cool"); map.put(new Short("3"), "is it?"); map.put(new Short("4"), "or is it not?"); rlist.add(map); rlist.add(new Boolean(true)); List<Object> c1 = new ArrayList<Object>(); List<Object> c1_1 = new ArrayList<Object>(); c1_1.add(new Integer(12)); List<Object> i2 = new ArrayList<Object>(); List<Integer> ii1 = new ArrayList<Integer>(); ii1.add(new Integer(13)); ii1.add(new Integer(14)); i2.add(ii1); Map<String, List<?>> ii2 = new HashMap<String, List<?>>(); List<Integer> iii1 = new ArrayList<Integer>(); iii1.add(new Integer(15)); ii2.put("phew", iii1); i2.add(ii2); c1_1.add(i2); c1.add(c1_1); rlist.add(c1); rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2 rlist.add(new HiveChar("hive\nchar", 10)); rlist.add(new HiveVarchar("hive\nvarchar", 20)); rlist.add(Date.valueOf("2014-01-07")); rlist.add(new Timestamp(System.currentTimeMillis())); rlist.add("hive\nbinary".getBytes("UTF-8")); } DefaultHCatRecord r = new DefaultHCatRecord(rlist); List<String> columnNames = Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( "tinyint,smallint,int,bigint,double,float,string,string," + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); HCatRecordObjectInspector objInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); List<Object> deserialized = JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); assertRecordEquals(rlist, deserialized); }
Example 17
Source File: SMSerDe.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
/** * An initialization function used to gather information about the table. * Typically, a SerDe implementation will be interested in the list of * column names and their types. That information will be used to help * perform actual serialization and deserialization of data. */ //@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { if (Log.isDebugEnabled()) SpliceLogUtils.debug(Log, "initialize with conf=%s, tbl=%s",conf,tbl); // Get a list of the table's column names. tableName = tbl.getProperty(MRConstants.SPLICE_TABLE_NAME); String hbaseDir = null; if (conf != null) { hbaseDir = conf.get(HConstants.HBASE_DIR); } if (hbaseDir == null) hbaseDir = System.getProperty(HConstants.HBASE_DIR); if (hbaseDir == null) throw new SerDeException("hbase root directory not set, please include hbase.rootdir in config or via -D system property ..."); if (conf != null) { conf.set(MRConstants.SPLICE_INPUT_TABLE_NAME, tableName); conf.set(MRConstants.SPLICE_JDBC_STR, tbl.getProperty(MRConstants.SPLICE_JDBC_STR)); conf.set(HConstants.HBASE_DIR, hbaseDir); if (conf.get(HiveConf.ConfVars.POSTEXECHOOKS.varname) == null) { conf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "com.splicemachine.mrio.api.hive.PostExecHook"); } if (conf.get(HiveConf.ConfVars.ONFAILUREHOOKS.varname) == null) { conf.set(HiveConf.ConfVars.ONFAILUREHOOKS.varname, "com.splicemachine.mrio.api.hive.FailureExecHook"); } } if (sqlUtil == null) sqlUtil = SMSQLUtil.getInstance(tbl.getProperty(MRConstants.SPLICE_JDBC_STR)); String colNamesStr = tbl.getProperty(Constants.LIST_COLUMNS); colNames.clear(); for (String split: colNamesStr.split(",")) colNames.add(split.toUpperCase()); String colTypesStr = tbl.getProperty(Constants.LIST_COLUMN_TYPES); colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr); objectCache = new ArrayList<Object>(colTypes.size()); if (tableName != null) { tableName = tableName.trim().toUpperCase(); try { if (!sqlUtil.checkTableExists(tableName)) throw new SerDeException(String.format("table %s does not exist...",tableName)); if (conf != null) { ScanSetBuilder tableScannerBuilder = sqlUtil.getTableScannerBuilder(tableName, colNames); conf.set(MRConstants.SPLICE_SCAN_INFO, tableScannerBuilder.base64Encode()); // TableContext tableContext = sqlUtil.createTableContext(tableName, tableScannerBuilder); // conf.set(MRConstants.SPLICE_TBLE_CONTEXT, tableContext.getTableContextBase64String()); } } catch (Exception e) { throw new SerDeException(e); } } if (Log.isDebugEnabled()) SpliceLogUtils.debug(Log, "generating hive info colNames=%s, colTypes=%s",colNames,colTypes); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes); rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo); //serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName()); Log.info("--------Finished initialize"); }