org.apache.hadoop.hive.serde2.SerDe Java Examples
The following examples show how to use
org.apache.hadoop.hive.serde2.SerDe.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveAbstractReader.java From dremio-oss with Apache License 2.0 | 6 votes |
public HiveAbstractReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split, final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf, final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe, final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables, final UserGroupInformation readerUgi) { super(context, projectedColumns); this.tableAttr = tableAttr; this.split = split; this.jobConf = jobConf; this.tableSerDe = tableSerDe; this.tableOI = tableOI; this.partitionSerDe = partitionSerDe == null ? tableSerDe : partitionSerDe; this.partitionOI = partitionOI == null ? tableOI : partitionOI; this.filter = filter; this.referencedTables = referencedTables; this.readerUgi = readerUgi; }
Example #2
Source File: HiveUtilities.java From dremio-oss with Apache License 2.0 | 5 votes |
public static StructObjectInspector getStructOI(final SerDe serDe) throws Exception { ObjectInspector oi = serDe.getObjectInspector(); if (oi.getCategory() != Category.STRUCT) { throw new UnsupportedOperationException(String.format("%s category not supported", oi.getCategory())); } return (StructObjectInspector) oi; }
Example #3
Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0 | 5 votes |
public HiveORCVectorizedReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split, final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf, final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe, final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables, final UserGroupInformation readerUgi) { super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter, referencedTables, readerUgi); }
Example #4
Source File: HiveTextReader.java From dremio-oss with Apache License 2.0 | 5 votes |
public HiveTextReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split, final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf, final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe, final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables, final UserGroupInformation readerUgi) { super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter, referencedTables, readerUgi); }
Example #5
Source File: TestInputOutputFormat.java From hive-dwrf with Apache License 2.0 | 5 votes |
@Test public void testEmptyFile() throws Exception { JobConf job = new JobConf(conf); Properties properties = new Properties(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL); writer.close(true); properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); SerDe serde = new OrcSerde(); serde.initialize(conf, properties); InputFormat<?,?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); // read the whole file conf.set("hive.io.file.readcolumn.ids", "0,1"); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); assertEquals(0.0, reader.getProgress(), 0.00001); assertEquals(0, reader.getPos()); assertEquals(false, reader.next(key, value)); reader.close(); assertEquals(null, serde.getSerDeStats()); }
Example #6
Source File: HiveRecordReaders.java From dremio-oss with Apache License 2.0 | 5 votes |
Reader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split, final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf, final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe, final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables, final UserGroupInformation readerUgi) { super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter, referencedTables, readerUgi); }
Example #7
Source File: HiveSerDeWrapper.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Get the {@link SerDe} instance associated with this {@link HiveSerDeWrapper}. * This method performs lazy initialization. */ public SerDe getSerDe() throws IOException { if (!this.serDe.isPresent()) { try { this.serDe = Optional.of(SerDe.class.cast(Class.forName(this.serDeClassName).newInstance())); } catch (Throwable t) { throw new IOException("Failed to instantiate SerDe " + this.serDeClassName, t); } } return this.serDe.get(); }
Example #8
Source File: KuduStorageHandler.java From HiveKudu-Handler with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return HiveKuduSerDe.class; }
Example #9
Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return SMSerDe.class; }
Example #10
Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return JdbcSerDe.class; }
Example #11
Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return EsSerDe.class; }
Example #12
Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return AccumuloSerde.class; }
Example #13
Source File: CassandraStorageHandler.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return CassandraColumnSerDe.class; }
Example #14
Source File: BlurHiveStorageHandler.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return BlurSerDe.class; }
Example #15
Source File: KafkaStorageHandler.java From HiveKa with Apache License 2.0 | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return AvroSerDe.class; }
Example #16
Source File: SolrStorageHandler.java From hive-solr with MIT License | 4 votes |
@Override public Class<? extends SerDe> getSerDeClass() { return SolrSerde.class; }
Example #17
Source File: ScanWithHiveReader.java From dremio-oss with Apache License 2.0 | 4 votes |
private static RecordReader getRecordReader(HiveSplitXattr splitXattr, HiveTableXattr tableXattr, OperatorContext context, HiveConf hiveConf, SplitAndPartitionInfo split, CompositeReaderConfig compositeReader, HiveProxyingSubScan config, UserGroupInformation readerUgi) throws Exception { final JobConf baseJobConf = new JobConf(hiveConf); final Properties tableProperties = new Properties(); addProperties(baseJobConf, tableProperties, HiveReaderProtoUtil.getTableProperties(tableXattr)); final boolean isTransactional = AcidUtils.isTablePropertyTransactional(baseJobConf); final boolean isPartitioned = config.getPartitionColumns() != null && config.getPartitionColumns().size() > 0; final Optional<String> tableInputFormat = HiveReaderProtoUtil.getTableInputFormat(tableXattr); final JobConf jobConf = new JobConf(baseJobConf); final SerDe tableSerDe = createSerDe(jobConf, HiveReaderProtoUtil.getTableSerializationLib(tableXattr).get(), tableProperties); final StructObjectInspector tableOI = getStructOI(tableSerDe); final SerDe partitionSerDe; final StructObjectInspector partitionOI; boolean hasDeltas = false; if (isTransactional) { InputSplit inputSplit = HiveUtilities.deserializeInputSplit(splitXattr.getInputSplit()); if (inputSplit instanceof OrcSplit) { hasDeltas = hasDeltas((OrcSplit) inputSplit); } } final Class<? extends HiveAbstractReader> tableReaderClass = getNativeReaderClass(tableInputFormat, context.getOptions(), hiveConf, false, isTransactional && hasDeltas); final Constructor<? extends HiveAbstractReader> tableReaderCtor = getNativeReaderCtor(tableReaderClass); Constructor<? extends HiveAbstractReader> readerCtor = tableReaderCtor; // It is possible to for a partition to have different input format than table input format. if (isPartitioned) { final List<Prop> partitionPropertiesList; final Properties partitionProperties = new Properties(); final Optional<String> partitionInputFormat; final Optional<String> partitionStorageHandlerName; // First add table properties and then add partition properties. Partition properties override table properties. addProperties(jobConf, partitionProperties, HiveReaderProtoUtil.getTableProperties(tableXattr)); // If Partition Properties are stored in DatasetMetadata (Pre 3.2.0) if (HiveReaderProtoUtil.isPreDremioVersion3dot2dot0LegacyFormat(tableXattr)) { logger.debug("Reading partition properties from DatasetMetadata"); partitionPropertiesList = HiveReaderProtoUtil.getPartitionProperties(tableXattr, splitXattr.getPartitionId()); addProperties(jobConf, partitionProperties, partitionPropertiesList); partitionSerDe = createSerDe(jobConf, HiveReaderProtoUtil.getPartitionSerializationLib(tableXattr, splitXattr.getPartitionId()).get(), partitionProperties ); partitionInputFormat = HiveReaderProtoUtil.getPartitionInputFormat(tableXattr, splitXattr.getPartitionId()); partitionStorageHandlerName = HiveReaderProtoUtil.getPartitionStorageHandler(tableXattr, splitXattr.getPartitionId()); } else { logger.debug("Reading partition properties from PartitionChunk"); final PartitionXattr partitionXattr = HiveReaderProtoUtil.getPartitionXattr(split); partitionPropertiesList = HiveReaderProtoUtil.getPartitionProperties(tableXattr, partitionXattr); addProperties(jobConf, partitionProperties, partitionPropertiesList); partitionSerDe = createSerDe(jobConf, HiveReaderProtoUtil.getPartitionSerializationLib(tableXattr, partitionXattr), partitionProperties ); partitionInputFormat = HiveReaderProtoUtil.getPartitionInputFormat(tableXattr, partitionXattr); partitionStorageHandlerName = HiveReaderProtoUtil.getPartitionStorageHandler(tableXattr, partitionXattr); } jobConf.setInputFormat(getInputFormatClass(jobConf, partitionInputFormat, partitionStorageHandlerName)); partitionOI = getStructOI(partitionSerDe); final boolean mixedSchema = !tableOI.equals(partitionOI); if (!partitionInputFormat.equals(tableInputFormat) || mixedSchema || isTransactional && hasDeltas) { final Class<? extends HiveAbstractReader> partitionReaderClass = getNativeReaderClass( partitionInputFormat, context.getOptions(), jobConf, mixedSchema, isTransactional); readerCtor = getNativeReaderCtor(partitionReaderClass); } } else { partitionSerDe = null; partitionOI = null; jobConf.setInputFormat(getInputFormatClass(jobConf, tableInputFormat, HiveReaderProtoUtil.getTableStorageHandler(tableXattr))); } return readerCtor.newInstance(tableXattr, split, compositeReader.getInnerColumns(), context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, config.getFilter(), config.getReferencedTables(), readerUgi); }
Example #18
Source File: ScanWithHiveReader.java From dremio-oss with Apache License 2.0 | 4 votes |
private static Constructor<? extends HiveAbstractReader> getNativeReaderCtor(Class<? extends HiveAbstractReader> clazz) throws NoSuchMethodException { return clazz.getConstructor(HiveTableXattr.class, SplitAndPartitionInfo.class, List.class, OperatorContext.class, JobConf.class, SerDe.class, StructObjectInspector.class, SerDe.class, StructObjectInspector.class, ScanFilter.class, Collection.class, UserGroupInformation.class); }
Example #19
Source File: HiveTextReader.java From dremio-oss with Apache License 2.0 | 4 votes |
@Override public int populateData() throws IOException, SerDeException { final SkipRecordsInspector skipRecordsInspector = this.skipRecordsInspector; final RecordReader<Object, Object> reader = this.reader; final Converter partTblObjectInspectorConverter = this.partTblObjectInspectorConverter; final Object key = this.key; final int numRowsPerBatch = (int) this.numRowsPerBatch; final StructField[] selectedStructFieldRefs = this.selectedStructFieldRefs; final SerDe partitionSerDe = this.partitionSerDe; final StructObjectInspector finalOI = this.finalOI; final ObjectInspector[] selectedColumnObjInspectors = this.selectedColumnObjInspectors; final HiveFieldConverter[] selectedColumnFieldConverters = this.selectedColumnFieldConverters; final ValueVector[] vectors = this.vectors; skipRecordsInspector.reset(); Object value; int recordCount = 0; while (recordCount < numRowsPerBatch) { try (OperatorStats.WaitRecorder recorder = OperatorStats.getWaitRecorder(this.context.getStats())) { boolean hasNext = reader.next(key, value = skipRecordsInspector.getNextValue()); if (!hasNext) { break; } } catch(FSError e) { throw HadoopFileSystemWrapper.propagateFSError(e); } if (skipRecordsInspector.doSkipHeader(recordCount++)) { continue; } Object bufferedValue = skipRecordsInspector.bufferAdd(value); if (bufferedValue != null) { Object deSerializedValue = partitionSerDe.deserialize((Writable) bufferedValue); if (partTblObjectInspectorConverter != null) { deSerializedValue = partTblObjectInspectorConverter.convert(deSerializedValue); } for (int i = 0; i < selectedStructFieldRefs.length; i++) { Object hiveValue = finalOI.getStructFieldData(deSerializedValue, selectedStructFieldRefs[i]); if (hiveValue != null) { selectedColumnFieldConverters[i].setSafeValue(selectedColumnObjInspectors[i], hiveValue, vectors[i], skipRecordsInspector.getActualCount()); } } skipRecordsInspector.incrementActualCount(); } skipRecordsInspector.incrementTempCount(); } for (int i = 0; i < selectedStructFieldRefs.length; i++) { vectors[i].setValueCount(skipRecordsInspector.getActualCount()); } skipRecordsInspector.updateContinuance(); return skipRecordsInspector.getActualCount(); }
Example #20
Source File: HiveUtilities.java From dremio-oss with Apache License 2.0 | 3 votes |
/** * Utility method which creates a SerDe object for given SerDe class name and properties. * * @param jobConf Configuration to use when creating SerDe class * @param sLib {@link SerDe} class name * @param properties SerDe properties * @return * @throws Exception */ public static final SerDe createSerDe(final JobConf jobConf, final String sLib, final Properties properties) throws Exception { final Class<? extends SerDe> c = Class.forName(sLib).asSubclass(SerDe.class); final SerDe serde = c.getConstructor().newInstance(); serde.initialize(jobConf, properties); return serde; }