org.apache.hadoop.hive.serde2.SerDe Java Exaples

Source File: HiveAbstractReader.java From dremio-oss with Apache License 2.0

6 votes

public HiveAbstractReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
                          final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
                          final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
                          final StructObjectInspector partitionOI, final ScanFilter filter,
                          final Collection<List<String>> referencedTables, final UserGroupInformation readerUgi) {
  super(context, projectedColumns);
  this.tableAttr = tableAttr;
  this.split = split;
  this.jobConf = jobConf;
  this.tableSerDe = tableSerDe;
  this.tableOI = tableOI;
  this.partitionSerDe = partitionSerDe == null ? tableSerDe : partitionSerDe;
  this.partitionOI = partitionOI == null ? tableOI : partitionOI;
  this.filter = filter;
  this.referencedTables = referencedTables;
  this.readerUgi = readerUgi;
}

Source File: HiveUtilities.java From dremio-oss with Apache License 2.0

5 votes

public static StructObjectInspector getStructOI(final SerDe serDe) throws Exception {
  ObjectInspector oi = serDe.getObjectInspector();
  if (oi.getCategory() != Category.STRUCT) {
    throw new UnsupportedOperationException(String.format("%s category not supported", oi.getCategory()));
  }
  return (StructObjectInspector) oi;
}

Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0

5 votes

public HiveORCVectorizedReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
    final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
    final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
    final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables,
    final UserGroupInformation readerUgi) {
  super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter,
    referencedTables, readerUgi);
}

Source File: HiveTextReader.java From dremio-oss with Apache License 2.0

5 votes

public HiveTextReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
    final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
    final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
    final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables,
    final UserGroupInformation readerUgi) {
  super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter,
    referencedTables, readerUgi);
}

Source File: TestInputOutputFormat.java From hive-dwrf with Apache License 2.0

5 votes

@Test
public void testEmptyFile() throws Exception {
  JobConf job = new JobConf(conf);
  Properties properties = new Properties();
  HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
  FileSinkOperator.RecordWriter writer =
      outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
          properties, Reporter.NULL);
  writer.close(true);
  properties.setProperty("columns", "x,y");
  properties.setProperty("columns.types", "int:int");
  SerDe serde = new OrcSerde();
  serde.initialize(conf, properties);
  InputFormat<?,?> in = new OrcInputFormat();
  FileInputFormat.setInputPaths(conf, testFilePath.toString());
  InputSplit[] splits = in.getSplits(conf, 1);
  assertEquals(1, splits.length);

  // read the whole file
  conf.set("hive.io.file.readcolumn.ids", "0,1");
  org.apache.hadoop.mapred.RecordReader reader =
      in.getRecordReader(splits[0], conf, Reporter.NULL);
  Object key = reader.createKey();
  Object value = reader.createValue();
  assertEquals(0.0, reader.getProgress(), 0.00001);
  assertEquals(0, reader.getPos());
  assertEquals(false, reader.next(key, value));
  reader.close();
  assertEquals(null, serde.getSerDeStats());
}

Source File: HiveRecordReaders.java From dremio-oss with Apache License 2.0

5 votes

Reader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
    final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
    final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
    final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables,
    final UserGroupInformation readerUgi) {
  super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter,
    referencedTables, readerUgi);
}

Source File: HiveSerDeWrapper.java From incubator-gobblin with Apache License 2.0

5 votes

/**
 * Get the {@link SerDe} instance associated with this {@link HiveSerDeWrapper}.
 * This method performs lazy initialization.
 */
public SerDe getSerDe() throws IOException {
  if (!this.serDe.isPresent()) {
    try {
      this.serDe = Optional.of(SerDe.class.cast(Class.forName(this.serDeClassName).newInstance()));
    } catch (Throwable t) {
      throw new IOException("Failed to instantiate SerDe " + this.serDeClassName, t);
    }
  }
  return this.serDe.get();
}

Source File: KuduStorageHandler.java From HiveKudu-Handler with Apache License 2.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
    return HiveKuduSerDe.class;
}

Source File: SMStorageHandler.java From spliceengine with GNU Affero General Public License v3.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
    return SMSerDe.class;
}

Source File: JdbcStorageHandler.java From HiveJdbcStorageHandler with Apache License 2.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
    return JdbcSerDe.class;
}

Source File: EsStorageHandler.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
    return EsSerDe.class;
}

Source File: AccumuloStorageHandler.java From accumulo-hive-storage-manager with Apache License 2.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
    return AccumuloSerde.class;
}

Source File: CassandraStorageHandler.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
  return CassandraColumnSerDe.class;
}

Source File: BlurHiveStorageHandler.java From incubator-retired-blur with Apache License 2.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
  return BlurSerDe.class;
}

Source File: KafkaStorageHandler.java From HiveKa with Apache License 2.0

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
  return AvroSerDe.class;
}

Source File: SolrStorageHandler.java From hive-solr with MIT License

4 votes

@Override
public Class<? extends SerDe> getSerDeClass() {
    return SolrSerde.class;
}

Source File: ScanWithHiveReader.java From dremio-oss with Apache License 2.0

4 votes

private static RecordReader getRecordReader(HiveSplitXattr splitXattr, HiveTableXattr tableXattr,
                                            OperatorContext context, HiveConf hiveConf,
                                            SplitAndPartitionInfo split, CompositeReaderConfig compositeReader,
                                            HiveProxyingSubScan config, UserGroupInformation readerUgi)
  throws Exception {

  final JobConf baseJobConf = new JobConf(hiveConf);
  final Properties tableProperties = new Properties();
  addProperties(baseJobConf, tableProperties, HiveReaderProtoUtil.getTableProperties(tableXattr));

  final boolean isTransactional = AcidUtils.isTablePropertyTransactional(baseJobConf);
  final boolean isPartitioned = config.getPartitionColumns() != null && config.getPartitionColumns().size() > 0;
  final Optional<String> tableInputFormat = HiveReaderProtoUtil.getTableInputFormat(tableXattr);
  final JobConf jobConf = new JobConf(baseJobConf);

  final SerDe tableSerDe = createSerDe(jobConf, HiveReaderProtoUtil.getTableSerializationLib(tableXattr).get(),
      tableProperties);
  final StructObjectInspector tableOI = getStructOI(tableSerDe);
  final SerDe partitionSerDe;
  final StructObjectInspector partitionOI;

  boolean hasDeltas = false;
  if (isTransactional) {
    InputSplit inputSplit = HiveUtilities.deserializeInputSplit(splitXattr.getInputSplit());
    if (inputSplit instanceof OrcSplit) {
      hasDeltas = hasDeltas((OrcSplit) inputSplit);
    }
  }

  final Class<? extends HiveAbstractReader> tableReaderClass =
    getNativeReaderClass(tableInputFormat, context.getOptions(), hiveConf, false, isTransactional && hasDeltas);

  final Constructor<? extends HiveAbstractReader> tableReaderCtor = getNativeReaderCtor(tableReaderClass);

  Constructor<? extends HiveAbstractReader> readerCtor = tableReaderCtor;
  // It is possible to for a partition to have different input format than table input format.
  if (isPartitioned) {
    final List<Prop> partitionPropertiesList;
    final Properties partitionProperties = new Properties();
    final Optional<String> partitionInputFormat;
    final Optional<String> partitionStorageHandlerName;
    // First add table properties and then add partition properties. Partition properties override table properties.
    addProperties(jobConf, partitionProperties, HiveReaderProtoUtil.getTableProperties(tableXattr));

    // If Partition Properties are stored in DatasetMetadata (Pre 3.2.0)
    if (HiveReaderProtoUtil.isPreDremioVersion3dot2dot0LegacyFormat(tableXattr)) {
      logger.debug("Reading partition properties from DatasetMetadata");
      partitionPropertiesList = HiveReaderProtoUtil.getPartitionProperties(tableXattr, splitXattr.getPartitionId());
      addProperties(jobConf, partitionProperties, partitionPropertiesList);
      partitionSerDe =
        createSerDe(jobConf,
          HiveReaderProtoUtil.getPartitionSerializationLib(tableXattr, splitXattr.getPartitionId()).get(),
          partitionProperties
        );
      partitionInputFormat = HiveReaderProtoUtil.getPartitionInputFormat(tableXattr, splitXattr.getPartitionId());
      partitionStorageHandlerName = HiveReaderProtoUtil.getPartitionStorageHandler(tableXattr, splitXattr.getPartitionId());

    } else {
      logger.debug("Reading partition properties from PartitionChunk");
      final PartitionXattr partitionXattr = HiveReaderProtoUtil.getPartitionXattr(split);
      partitionPropertiesList = HiveReaderProtoUtil.getPartitionProperties(tableXattr, partitionXattr);
      addProperties(jobConf, partitionProperties, partitionPropertiesList);
      partitionSerDe =
        createSerDe(jobConf,
          HiveReaderProtoUtil.getPartitionSerializationLib(tableXattr, partitionXattr),
          partitionProperties
        );
      partitionInputFormat = HiveReaderProtoUtil.getPartitionInputFormat(tableXattr, partitionXattr);
      partitionStorageHandlerName = HiveReaderProtoUtil.getPartitionStorageHandler(tableXattr, partitionXattr);
    }

    jobConf.setInputFormat(getInputFormatClass(jobConf, partitionInputFormat, partitionStorageHandlerName));
    partitionOI = getStructOI(partitionSerDe);

    final boolean mixedSchema = !tableOI.equals(partitionOI);
    if (!partitionInputFormat.equals(tableInputFormat) || mixedSchema || isTransactional && hasDeltas) {
      final Class<? extends HiveAbstractReader> partitionReaderClass = getNativeReaderClass(
        partitionInputFormat, context.getOptions(), jobConf, mixedSchema, isTransactional);
      readerCtor = getNativeReaderCtor(partitionReaderClass);
    }
  } else {
    partitionSerDe = null;
    partitionOI = null;
    jobConf.setInputFormat(getInputFormatClass(jobConf, tableInputFormat, HiveReaderProtoUtil.getTableStorageHandler(tableXattr)));
  }

  return readerCtor.newInstance(tableXattr, split,
      compositeReader.getInnerColumns(), context, jobConf, tableSerDe, tableOI, partitionSerDe,
      partitionOI, config.getFilter(), config.getReferencedTables(), readerUgi);
}

Source File: ScanWithHiveReader.java From dremio-oss with Apache License 2.0

4 votes

private static Constructor<? extends HiveAbstractReader> getNativeReaderCtor(Class<? extends HiveAbstractReader> clazz)
    throws NoSuchMethodException {
  return clazz.getConstructor(HiveTableXattr.class, SplitAndPartitionInfo.class, List.class, OperatorContext.class,
                              JobConf.class, SerDe.class, StructObjectInspector.class, SerDe.class, StructObjectInspector.class,
                              ScanFilter.class, Collection.class, UserGroupInformation.class);
}

Source File: HiveTextReader.java From dremio-oss with Apache License 2.0

4 votes

@Override
public int populateData() throws IOException, SerDeException {
  final SkipRecordsInspector skipRecordsInspector = this.skipRecordsInspector;
  final RecordReader<Object, Object> reader = this.reader;
  final Converter partTblObjectInspectorConverter = this.partTblObjectInspectorConverter;
  final Object key = this.key;

  final int numRowsPerBatch = (int) this.numRowsPerBatch;

  final StructField[] selectedStructFieldRefs = this.selectedStructFieldRefs;
  final SerDe partitionSerDe = this.partitionSerDe;
  final StructObjectInspector finalOI = this.finalOI;
  final ObjectInspector[] selectedColumnObjInspectors = this.selectedColumnObjInspectors;
  final HiveFieldConverter[] selectedColumnFieldConverters = this.selectedColumnFieldConverters;
  final ValueVector[] vectors = this.vectors;

  skipRecordsInspector.reset();
  Object value;

  int recordCount = 0;

  while (recordCount < numRowsPerBatch) {
    try (OperatorStats.WaitRecorder recorder = OperatorStats.getWaitRecorder(this.context.getStats())) {
      boolean hasNext = reader.next(key, value = skipRecordsInspector.getNextValue());
      if (!hasNext) {
        break;
      }
    }
    catch(FSError e) {
      throw HadoopFileSystemWrapper.propagateFSError(e);
    }
    if (skipRecordsInspector.doSkipHeader(recordCount++)) {
      continue;
    }
    Object bufferedValue = skipRecordsInspector.bufferAdd(value);
    if (bufferedValue != null) {
      Object deSerializedValue = partitionSerDe.deserialize((Writable) bufferedValue);
      if (partTblObjectInspectorConverter != null) {
        deSerializedValue = partTblObjectInspectorConverter.convert(deSerializedValue);
      }

      for (int i = 0; i < selectedStructFieldRefs.length; i++) {
        Object hiveValue = finalOI.getStructFieldData(deSerializedValue, selectedStructFieldRefs[i]);
        if (hiveValue != null) {
          selectedColumnFieldConverters[i].setSafeValue(selectedColumnObjInspectors[i], hiveValue, vectors[i], skipRecordsInspector.getActualCount());
        }
      }
      skipRecordsInspector.incrementActualCount();
    }
    skipRecordsInspector.incrementTempCount();
  }
  for (int i = 0; i < selectedStructFieldRefs.length; i++) {
    vectors[i].setValueCount(skipRecordsInspector.getActualCount());
  }

  skipRecordsInspector.updateContinuance();
  return skipRecordsInspector.getActualCount();
}

Source File: HiveUtilities.java From dremio-oss with Apache License 2.0

3 votes

/**
 * Utility method which creates a SerDe object for given SerDe class name and properties.
 *
 * @param jobConf Configuration to use when creating SerDe class
 * @param sLib {@link SerDe} class name
 * @param properties SerDe properties
 * @return
 * @throws Exception
 */
public static final SerDe createSerDe(final JobConf jobConf, final String sLib, final Properties properties) throws Exception {
  final Class<? extends SerDe> c = Class.forName(sLib).asSubclass(SerDe.class);
  final SerDe serde = c.getConstructor().newInstance();
  serde.initialize(jobConf, properties);

  return serde;
}

org.apache.hadoop.hive.serde2.SerDe Java Examples