Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector#getStructFieldsDataAsList()

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector#getStructFieldsDataAsList() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDataWritableWriter.java    From presto with Apache License 2.0 6 votes vote down vote up
/**
 * It writes all the fields contained inside a group to the RecordConsumer.
 *
 * @param value The list of values contained in the group.
 * @param inspector The object inspector used to get the correct value type.
 * @param type Type that contains information about the group schema.
 */
private void writeGroupFields(Object value, StructObjectInspector inspector, GroupType type)
{
    if (value != null) {
        List<? extends StructField> fields = inspector.getAllStructFieldRefs();
        List<Object> fieldValuesList = inspector.getStructFieldsDataAsList(value);

        for (int i = 0; i < type.getFieldCount(); i++) {
            Type fieldType = type.getType(i);
            String fieldName = fieldType.getName();
            Object fieldValue = fieldValuesList.get(i);

            if (fieldValue != null) {
                ObjectInspector fieldInspector = fields.get(i).getFieldObjectInspector();
                recordConsumer.startField(fieldName, i);
                writeValue(fieldValue, fieldInspector, fieldType);
                recordConsumer.endField(fieldName, i);
            }
        }
    }
}
 
Example 2
Source File: HiveStructObjectConverter.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
@Override
public Object convert(ObjectInspector objectInspector, Object o, TypeInfo odpsTypeInfo) {
  StructObjectInspector structObjectInspector = (StructObjectInspector) objectInspector;
  StructTypeInfo structTypeInfo = (StructTypeInfo) odpsTypeInfo;

  List<Object> odpsValues = new ArrayList<>();
  List<TypeInfo> fieldTypeInfos = structTypeInfo.getFieldTypeInfos();
  List<Object> values = structObjectInspector.getStructFieldsDataAsList(o);
  List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
  for(int i = 0; i < fields.size(); i++) {
    StructField field = fields.get(i);
    Object value = HiveObjectConverter.convert(
        field.getFieldObjectInspector(), values.get(i), fieldTypeInfos.get(i));
    odpsValues.add(value);
  }
  return new SimpleStruct(structTypeInfo, odpsValues);
}
 
Example 3
Source File: BlurSerializer.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames,
    List<TypeInfo> columnTypes, Map<String, ColumnDefinition> schema, String family) throws SerDeException {
  BlurRecord blurRecord = new BlurRecord();
  blurRecord.setFamily(family);

  StructObjectInspector soi = (StructObjectInspector) objectInspector;

  List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
  int size = columnNames.size();
  if (outputFieldRefs.size() != size) {
    throw new SerDeException("Number of input columns was different than output columns (in = " + size + " vs out = "
        + outputFieldRefs.size());
  }

  List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);

  if (structFieldsDataAsList.size() != size) {
    throw new SerDeException("Number of input columns was different than output columns (in = "
        + structFieldsDataAsList.size() + " vs out = " + size);
  }

  for (int i = 0; i < size; i++) {
    String columnName = _columnNameResolver.fromHiveToBlur(columnNames.get(i));
    StructField structFieldRef = outputFieldRefs.get(i);
    ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
    Object structFieldData = structFieldsDataAsList.get(i);
    add(blurRecord, columnName, fieldOI, structFieldData);
  }
  return blurRecord;
}
 
Example 4
Source File: TestOrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
private void verifyData(Path orcFile, Iterator<Tuple> iter, FileSystem fs, int expectedTotalRows) throws Exception {

        int expectedRows = 0;
        int actualRows = 0;
        Reader orcReader = OrcFile.createReader(fs, orcFile);
        ObjectInspector oi = orcReader.getObjectInspector();
        StructObjectInspector soi = (StructObjectInspector) oi;

        RecordReader reader = orcReader.rows(null);
        Object row = null;

        while (reader.hasNext()) {
            row = reader.next(row);
            expectedRows++;
            List<?> orcRow = soi.getStructFieldsDataAsList(row);
            if (!iter.hasNext()) {
                break;
            }
            Tuple t = iter.next();
            assertEquals(orcRow.size(), t.size());
            actualRows++;

            for (int i = 0; i < orcRow.size(); i++) {
                Object expected = orcRow.get(i);
                Object actual = t.get(i);
                compareData(expected, actual);
            }
        }
        assertFalse(iter.hasNext());
        assertEquals(expectedRows, actualRows);
        assertEquals(expectedTotalRows, actualRows);

    }
 
Example 5
Source File: TestOrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
private void verifyData(Path orcFile, Path pigOrcFile, FileSystem fs, int expectedTotalRows) throws Exception {

        int expectedRows = 0;
        int actualRows = 0;
        Reader orcReaderExpected = OrcFile.createReader(fs, orcFile);
        StructObjectInspector soiExpected = (StructObjectInspector) orcReaderExpected.getObjectInspector();
        Reader orcReaderActual = OrcFile.createReader(fs, orcFile);
        StructObjectInspector soiActual = (StructObjectInspector) orcReaderActual.getObjectInspector();

        RecordReader readerExpected = orcReaderExpected.rows(null);
        Object expectedRow = null;
        RecordReader readerActual = orcReaderActual.rows(null);
        Object actualRow = null;

        while (readerExpected.hasNext()) {
            expectedRow = readerExpected.next(expectedRow);
            expectedRows++;
            List<?> orcRowExpected = soiExpected.getStructFieldsDataAsList(expectedRow);
            if (!readerActual.hasNext()) {
                break;
            }
            actualRow = readerActual.next(actualRow);
            actualRows++;
            List<?> orcRowActual = soiActual.getStructFieldsDataAsList(actualRow);
            assertEquals(orcRowExpected.size(), orcRowActual.size());

            for (int i = 0; i < orcRowExpected.size(); i++) {
                assertEquals(orcRowExpected.get(i), orcRowActual.get(i));
            }
        }
        assertFalse(readerActual.hasNext());
        assertEquals(expectedRows, actualRows);
        assertEquals(expectedTotalRows, actualRows);

        readerExpected.close();
        readerActual.close();
    }
 
Example 6
Source File: PutORCTest.java    From nifi with Apache License 2.0 5 votes vote down vote up
private void verifyORCUsers(final Path orcUsers, final int numExpectedUsers, BiFunction<List<Object>, Integer, Void> assertFunction) throws IOException {
    Reader reader = OrcFile.createReader(orcUsers, OrcFile.readerOptions(testConf));
    RecordReader recordReader = reader.rows();

    TypeInfo typeInfo =
            TypeInfoUtils.getTypeInfoFromTypeString("struct<name:string,favorite_number:int,favorite_color:string,scale:double>");
    StructObjectInspector inspector = (StructObjectInspector)
            OrcStruct.createObjectInspector(typeInfo);

    int currUser = 0;
    Object nextRecord = null;
    while ((nextRecord = recordReader.next(nextRecord)) != null) {
        Assert.assertNotNull(nextRecord);
        Assert.assertTrue("Not an OrcStruct", nextRecord instanceof OrcStruct);
        List<Object> x = inspector.getStructFieldsDataAsList(nextRecord);

        if (assertFunction == null) {
            assertEquals("name" + currUser, x.get(0).toString());
            assertEquals(currUser, ((IntWritable) x.get(1)).get());
            assertEquals("blue" + currUser, x.get(2).toString());
            assertEquals(10.0 * currUser, ((DoubleWritable) x.get(3)).get(), Double.MIN_VALUE);
        } else {
            assertFunction.apply(x, currUser);
        }
        currUser++;
    }

    assertEquals(numExpectedUsers, currUser);
}
 
Example 7
Source File: WriterImpl.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
@Override
void write(Object obj) throws IOException {
  long rawDataSize = 0;
  if (obj != null) {
    StructObjectInspector insp = (StructObjectInspector) inspector;
    List<Object> fieldDataList = insp.getStructFieldsDataAsList(obj);

    for(int i = 0; i < fields.size(); ++i) {
      TreeWriter writer = childrenWriters[i];
      writer.write(fieldDataList.get(i));
      rawDataSize += writer.getRowRawDataSize();
    }
  }
  super.write(obj, rawDataSize);
}
 
Example 8
Source File: DynamoDBSerDe.java    From emr-dynamodb-connector with Apache License 2.0 4 votes vote down vote up
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
  // Prepare the field ObjectInspectors
  StructObjectInspector soi = (StructObjectInspector) objInspector;
  List<? extends StructField> fields = soi.getAllStructFieldRefs();
  List<Object> rowData = soi.getStructFieldsDataAsList(obj);
  Map<String, AttributeValue> item = Maps.newHashMap();

  validateData(fields, rowData);

  for (int i = 0; i < fields.size(); i++) {
    StructField field = fields.get(i);
    Object data = rowData.get(i);
    String columnName = columnNames.get(i);
    ObjectInspector fieldOI = field.getFieldObjectInspector();

    // Get the Hive to DynamoDB mapper
    HiveDynamoDBType ddType = typeMappings.get(columnName);

    // Check if this column maps a DynamoDB item.
    if (HiveDynamoDBTypeFactory.isHiveDynamoDBItemMapType(ddType)) {
      HiveDynamoDBItemType ddItemType = (HiveDynamoDBItemType) ddType;
      Map<String, AttributeValue> backupItem = ddItemType.parseDynamoDBData(data, fieldOI);

      // We give higher priority to attributes directly mapped to
      // columns. So we do not update the value of an attribute if
      // it already exists. This can happen in case of partial schemas
      // when there is a full backup column and attribute mapped
      // columns.
      for (Map.Entry<String, AttributeValue> entry : backupItem.entrySet()) {
        if (!columnMappings.containsValue(entry.getKey())) {
          item.put(entry.getKey(), entry.getValue());
        }
      }
    } else {
      // User has mapped individual attribute in DynamoDB to
      // corresponding Hive columns.
      AttributeValue attributeValue = data == null ?
          DynamoDBDataParser.getNullAttribute(nullSerialization) :
          ddType.getDynamoDBData(data, fieldOI, nullSerialization);

      if (attributeValue != null) {
        item.put(columnMappings.get(columnName), attributeValue);
      }
    }
  }

  return new DynamoDBItemWritable(item);
}
 
Example 9
Source File: MulticlassOnlineClassifierUDTF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI) throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI);
            }
        } else {
            LazySimpleSerDe serde = HiveUtils.getLineSerde(labelOI, featureOI, weightOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid the case that key or value is null
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    model.set(k, new WeightValue(v, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}
 
Example 10
Source File: MulticlassOnlineClassifierUDTF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI, WritableFloatObjectInspector covarOI)
        throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI,
                    covarOI);
            }
        } else {
            LazySimpleSerDe serde =
                    HiveUtils.getLineSerde(labelOI, featureOI, weightOI, covarOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            StructField c4ref = lineOI.getStructFieldRef("c4");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();
            FloatObjectInspector c4refOI =
                    (FloatObjectInspector) c4ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    Object f3 = fields.get(3);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid unexpected case
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    float cov =
                            (f3 == null) ? WeightValueWithCovar.DEFAULT_COVAR : c4refOI.get(f3);
                    model.set(k, new WeightValueWithCovar(v, cov, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}
 
Example 11
Source File: DistributedCacheLookupUDF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private static void loadValues(Object2ObjectMap<Object, Object> map, File file,
        PrimitiveObjectInspector keyOI, PrimitiveObjectInspector valueOI)
        throws IOException, SerDeException {
    if (!file.exists()) {
        return;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                loadValues(map, f, keyOI, valueOI);
            }
        } else {
            LazySimpleSerDe serde = HiveUtils.getKeyValueLineSerde(keyOI, valueOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField keyRef = lineOI.getStructFieldRef("key");
            StructField valueRef = lineOI.getStructFieldRef("value");
            PrimitiveObjectInspector keyRefOI =
                    (PrimitiveObjectInspector) keyRef.getFieldObjectInspector();
            PrimitiveObjectInspector valueRefOI =
                    (PrimitiveObjectInspector) valueRef.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object k = keyRefOI.getPrimitiveJavaObject(f0);
                    Object v = valueRefOI.getPrimitiveWritableObject(valueRefOI.copyObject(f1));
                    map.put(k, v);
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
}