org.apache.hadoop.hive.serde2.objectinspector.StructField#getFieldObjectInspector

Source File: ObjectInspectorTest.java From Hive-XML-SerDe with Apache License 2.0

6 votes

@SuppressWarnings("rawtypes")
public void testSimpleXmlNotMap() throws SerDeException {
    XmlSerDe xmlSerDe = new XmlSerDe();
    Configuration configuration = new Configuration();
    Properties properties = new Properties();
    properties.put(LIST_COLUMNS, "test");
    properties.put(LIST_COLUMN_TYPES, "map<string,string>");
    properties.setProperty("column.xpath.test", "//*[contains(name(),'test')]/text()");
    xmlSerDe.initialize(configuration, properties);
    Text text = new Text();
    text.set("<root><test1>string1</test1><test2>string2</test2></root>");
    Object o = xmlSerDe.deserialize(text);
    XmlStructObjectInspector structInspector = ((XmlStructObjectInspector) xmlSerDe.getObjectInspector());
    StructField structField = structInspector.getStructFieldRef("test");
    Object data = structInspector.getStructFieldData(o, structField);
    XmlMapObjectInspector fieldInspector = (XmlMapObjectInspector) structField.getFieldObjectInspector();
    Map map = fieldInspector.getMap(data);
    assertEquals(0, map.size());
}

Source File: XmlStructObjectInspector.java From Hive-XML-SerDe with Apache License 2.0

6 votes

/**
 * @see org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector#getStructFieldData(java.lang.Object,
 *      org.apache.hadoop.hive.serde2.objectinspector.StructField)
 */
@SuppressWarnings("unchecked")
@Override
public Object getStructFieldData(Object data, StructField structField) {
    if ((data instanceof List) && !(data instanceof SerDeArray)) {
        MyField f = (MyField) structField;
        int fieldID = f.getFieldID();
        return ((List<Object>) data).get(fieldID);
    } else {
        ObjectInspector fieldObjectInspector = structField.getFieldObjectInspector();
        Category category = fieldObjectInspector.getCategory();
        Object fieldData = this.xmlProcessor.getObjectValue(data, structField.getFieldName());
        switch (category) {
            case PRIMITIVE: {
                PrimitiveObjectInspector primitiveObjectInspector = (PrimitiveObjectInspector) fieldObjectInspector;
                PrimitiveCategory primitiveCategory = primitiveObjectInspector.getPrimitiveCategory();
                return this.xmlProcessor.getPrimitiveObjectValue(fieldData, primitiveCategory);
            }
            default:
                return fieldData;
        }
    }
}

Source File: JSONCDHSerDe.java From bigdata-tutorial with Apache License 2.0

6 votes

/**
 * Deparses struct data into a serializable JSON object.
 *
 * @param obj      - Hive struct data
 * @param structOI - ObjectInspector for the struct
 * @param isRow    - Whether or not this struct represents a top-level row
 * @return - A deparsed struct
 */
private Object deparseStruct(Object obj,
							 StructObjectInspector structOI,
							 boolean isRow) {
	Map<Object, Object> struct = new HashMap<Object, Object>();
	List<? extends StructField> fields = structOI.getAllStructFieldRefs();
	for (int i = 0; i < fields.size(); i++) {
		StructField field = fields.get(i);
		// The top-level row object is treated slightly differently from other
		// structs, because the field names for the row do not correctly reflect
		// the Hive column names. For lower-level structs, we can get the field
		// name from the associated StructField object.
		String fieldName = isRow ? colNames.get(i) : field.getFieldName();
		ObjectInspector fieldOI = field.getFieldObjectInspector();
		Object fieldObj = structOI.getStructFieldData(obj, field);
		struct.put(fieldName, deparseObject(fieldObj, fieldOI));
	}
	return struct;
}

Source File: MaxRowUDAF.java From incubator-hivemall with Apache License 2.0

6 votes

private ObjectInspector initReduceSide(StructObjectInspector inputStructOI)
        throws HiveException {
    List<? extends StructField> fields = inputStructOI.getAllStructFieldRefs();
    int length = fields.size();
    this.inputStructOI = inputStructOI;
    this.inputOIs = new ObjectInspector[length];
    this.outputOIs = new ObjectInspector[length];

    for (int i = 0; i < length; i++) {
        StructField field = fields.get(i);
        ObjectInspector oi = field.getFieldObjectInspector();
        inputOIs[i] = oi;
        outputOIs[i] = ObjectInspectorUtils.getStandardObjectInspector(oi);
    }

    return ObjectInspectorUtils.getStandardObjectInspector(inputStructOI);
}

Source File: JSONSerDe.java From searchanalytics-bigdata with MIT License

6 votes

/**
 * Deparses struct data into a serializable JSON object.
 *
 * @param obj
 *            - Hive struct data
 * @param structOI
 *            - ObjectInspector for the struct
 * @param isRow
 *            - Whether or not this struct represents a top-level row
 * @return - A deparsed struct
 */
private Object deparseStruct(final Object obj,
		final StructObjectInspector structOI, final boolean isRow) {
	final Map<Object, Object> struct = new HashMap<Object, Object>();
	final List<? extends StructField> fields = structOI
			.getAllStructFieldRefs();
	for (int i = 0; i < fields.size(); i++) {
		final StructField field = fields.get(i);
		// The top-level row object is treated slightly differently from
		// other
		// structs, because the field names for the row do not correctly
		// reflect
		// the Hive column names. For lower-level structs, we can get the
		// field
		// name from the associated StructField object.
		final String fieldName = isRow ? colNames.get(i) : field
				.getFieldName();
		final ObjectInspector fieldOI = field.getFieldObjectInspector();
		final Object fieldObj = structOI.getStructFieldData(obj, field);
		struct.put(fieldName, deparseObject(fieldObj, fieldOI));
	}
	return struct;
}

Source File: ObjectInspectorTest.java From Hive-XML-SerDe with Apache License 2.0

6 votes

@SuppressWarnings("rawtypes")
public void testSimpleXmlMap() throws SerDeException {
     XmlSerDe xmlSerDe = new XmlSerDe();
     Configuration configuration = new Configuration();
     Properties properties = new Properties();
     properties.put(LIST_COLUMNS, "test");
     properties.put(LIST_COLUMN_TYPES, "map<string,string>");
     properties.setProperty("column.xpath.test", "//*[contains(name(),'test')]");
     xmlSerDe.initialize(configuration, properties);
     Text text = new Text();
     text.set("<root><test1>string1</test1><test2>string2</test2></root>");
     Object o = xmlSerDe.deserialize(text);
     XmlStructObjectInspector structInspector = ((XmlStructObjectInspector) xmlSerDe.getObjectInspector());
     StructField structField = structInspector.getStructFieldRef("test");
     Object data = structInspector.getStructFieldData(o, structField);
     XmlMapObjectInspector fieldInspector = (XmlMapObjectInspector) structField.getFieldObjectInspector();
     Map map = fieldInspector.getMap(data);
     PrimitiveObjectInspector valueObjectInspector = (PrimitiveObjectInspector) fieldInspector.getMapValueObjectInspector();
     String test = (String) valueObjectInspector.getPrimitiveJavaObject(map.get("test1"));
     assertEquals("string1", test);
}

Source File: BlurSerializer.java From incubator-retired-blur with Apache License 2.0

5 votes

private String getFieldData(String columnName, Object data, StructObjectInspector structObjectInspector,
    Map<String, StructField> allStructFieldRefs, String name) throws SerDeException {
  StructField structField = allStructFieldRefs.get(name);
  ObjectInspector fieldObjectInspector = structField.getFieldObjectInspector();
  Object structFieldData = structObjectInspector.getStructFieldData(data, structField);
  if (fieldObjectInspector instanceof PrimitiveObjectInspector) {
    return toString(columnName, structFieldData, (PrimitiveObjectInspector) fieldObjectInspector);
  } else {
    throw new SerDeException("Embedded non-primitive type is not supported columnName [" + columnName
        + "] objectInspector [" + fieldObjectInspector + "].");
  }
}

Source File: HiveFieldExtractor.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Override
protected Object extractField(Object target) {
    List<String> flNames = fieldNames;

    for (int i = 0; i < flNames.size(); i++) {
        String fl = flNames.get(i);
        if (target instanceof HiveType) {
            HiveType type = (HiveType) target;
            ObjectInspector inspector = type.getObjectInspector();
            if (inspector instanceof StructObjectInspector) {
                StructObjectInspector soi = (StructObjectInspector) inspector;
                StructField field = soi.getStructFieldRef(fl);
                ObjectInspector foi = field.getFieldObjectInspector();
                Assert.isTrue(foi.getCategory() == ObjectInspector.Category.PRIMITIVE,
                        String.format("Field [%s] needs to be a primitive; found [%s]", fl, foi.getTypeName()));

                // expecting a writeable - simply do a toString
                target = soi.getStructFieldData(type.getObject(), field);
            }
            else {
                return FieldExtractor.NOT_FOUND;
            }
        }
        else {
            return FieldExtractor.NOT_FOUND;
        }
    }

    if (target == null || target instanceof NullWritable) {
        return StringUtils.EMPTY;
    }
    return target.toString();
}

Source File: ParquetHiveSerDe.java From parquet-mr with Apache License 2.0

5 votes

private ArrayWritable createStruct(final Object obj, final StructObjectInspector inspector)
    throws SerDeException {
  final List<? extends StructField> fields = inspector.getAllStructFieldRefs();
  final Writable[] arr = new Writable[fields.size()];
  for (int i = 0; i < fields.size(); i++) {
    final StructField field = fields.get(i);
    final Object subObj = inspector.getStructFieldData(obj, field);
    final ObjectInspector subInspector = field.getFieldObjectInspector();
    arr[i] = createObject(subObj, subInspector);
  }
  return new ArrayWritable(Writable.class, arr);
}

Source File: BitcoinTransactionHashSegwitUDF.java From hadoopcryptoledger with Apache License 2.0

5 votes

/**
* Read list of Bitcoin ScriptWitness items from a table in Hive in any format (e.g. ORC, Parquet)
*
* @param loi ObjectInspector for processing the Object containing a list
* @param listOfScriptWitnessItemObject object containing the list of scriptwitnessitems of a Bitcoin Transaction
*
* @return a list of BitcoinScriptWitnessItem 
*
*/

private List<BitcoinScriptWitnessItem> readListOfBitcoinScriptWitnessFromTable(ListObjectInspector loi, Object listOfScriptWitnessItemObject) {
int listLength=loi.getListLength(listOfScriptWitnessItemObject);
List<BitcoinScriptWitnessItem> result = new ArrayList<>(listLength);
StructObjectInspector listOfScriptwitnessItemElementObjectInspector = (StructObjectInspector)loi.getListElementObjectInspector();
for (int i=0;i<listLength;i++) {
	Object currentlistofscriptwitnessitemObject = loi.getListElement(listOfScriptWitnessItemObject,i);
	StructField stackitemcounterSF = listOfScriptwitnessItemElementObjectInspector.getStructFieldRef("stackitemcounter");
	StructField scriptwitnesslistSF = listOfScriptwitnessItemElementObjectInspector.getStructFieldRef("scriptwitnesslist");
	boolean scriptwitnessitemNull = (stackitemcounterSF==null) || (scriptwitnesslistSF==null) ; 
	if (scriptwitnessitemNull) {
		LOG.warn("Invalid BitcoinScriptWitnessItem detected at position "+i);
		return new ArrayList<>();
	}
	byte[] stackItemCounter = wboi.getPrimitiveJavaObject(listOfScriptwitnessItemElementObjectInspector.getStructFieldData(currentlistofscriptwitnessitemObject,stackitemcounterSF));
	Object listofscriptwitnessObject =  soi.getStructFieldData(currentlistofscriptwitnessitemObject,scriptwitnesslistSF);
	ListObjectInspector loiScriptWitness=(ListObjectInspector)scriptwitnesslistSF.getFieldObjectInspector();
	StructObjectInspector listOfScriptwitnessElementObjectInspector = (StructObjectInspector)loiScriptWitness.getListElementObjectInspector();
	int listWitnessLength = 	loiScriptWitness.getListLength(listofscriptwitnessObject);
	List<BitcoinScriptWitness> currentScriptWitnessList = new ArrayList<>(listWitnessLength);
	for (int j=0;j<listWitnessLength;j++) {
		Object currentlistofscriptwitnessObject = loi.getListElement(listofscriptwitnessObject,j);
		
		StructField witnessscriptlengthSF = listOfScriptwitnessElementObjectInspector.getStructFieldRef("witnessscriptlength");
		StructField witnessscriptSF = listOfScriptwitnessElementObjectInspector.getStructFieldRef("witnessscript");
		boolean scriptwitnessNull = (witnessscriptlengthSF==null)  || (witnessscriptSF==null);
		if (scriptwitnessNull) {
			LOG.warn("Invalid BitcoinScriptWitness detected at position "+j+ "for BitcoinScriptWitnessItem "+i);
			return new ArrayList<>();
		}
		byte[] scriptWitnessLength = wboi.getPrimitiveJavaObject(listOfScriptwitnessElementObjectInspector.getStructFieldData(currentlistofscriptwitnessObject,witnessscriptlengthSF));
		byte[] scriptWitness = wboi.getPrimitiveJavaObject(listOfScriptwitnessElementObjectInspector.getStructFieldData(currentlistofscriptwitnessObject,witnessscriptSF));
		currentScriptWitnessList.add(new BitcoinScriptWitness(scriptWitnessLength,scriptWitness));
	}
	BitcoinScriptWitnessItem currentBitcoinScriptWitnessItem = new BitcoinScriptWitnessItem(stackItemCounter,currentScriptWitnessList);
	result.add(currentBitcoinScriptWitnessItem);
}
return result;
}

Source File: MapProcessor.java From hive-solr with Apache License 2.0

5 votes

public static void resolve(boolean enableFieldMapping, LWDocument doc, String docFieldName, Object data,
  StructField structField, StructObjectInspector inspector) {
  MapObjectInspector moi = (MapObjectInspector) structField.getFieldObjectInspector();
  Object mapValue = inspector.getStructFieldData(data, structField);
  Map<Object, Object> map = (Map<Object, Object>) copyToStandardJavaObject(mapValue, moi);
  Map<String, Object> entries = new HashMap<>();
  resolveMap(enableFieldMapping, entries, docFieldName, map);

  for (Map.Entry<String, Object> entry : entries.entrySet()) {
    doc.addField(entry.getKey(), entry.getValue());
  }

  entries.clear();
}

Source File: ArrayProcessor.java From hive-solr with Apache License 2.0

5 votes

public static void resolve(boolean enableFieldMapping, LWDocument doc, String docFieldName, Object data,
  StructField structField, StructObjectInspector inspector) throws Exception {
  ListObjectInspector loi = (ListObjectInspector) structField.getFieldObjectInspector();
  Object listValue = inspector.getStructFieldData(data, structField);
  List<Object> list = (List<Object>) copyToStandardJavaObject(listValue, loi);
  Map<String, Object> entries = new HashMap<>();
  resolveList(enableFieldMapping, entries, docFieldName, list, -1);

  for (Map.Entry<String, Object> entry : entries.entrySet()) {
    doc.addField(entry.getKey(), entry.getValue());
  }

  entries.clear();
}

Source File: HiveKuduSerDe.java From HiveKudu-Handler with Apache License 2.0

5 votes

@Override
public HiveKuduWritable serialize(Object row, ObjectInspector inspector)
    throws SerDeException {

    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if (fields.size() != fieldCount) {
        throw new SerDeException(String.format(
                "Required %d columns, received %d.", fieldCount,
                fields.size()));
    }

    cachedWritable.clear();

    for (int i = 0; i < fieldCount; i++) {
        StructField structField = fields.get(i);
        if (structField != null) {
            Object field = structInspector.getStructFieldData(row,
                    structField);
            ObjectInspector fieldOI = structField.getFieldObjectInspector();

            Object javaObject = HiveKuduBridgeUtils.deparseObject(field,
                    fieldOI);
            LOG.warn("Column value of " + i + " is " + javaObject.toString());
            cachedWritable.set(i, javaObject);
        }
    }
    return cachedWritable;
}

Source File: MulticlassOnlineClassifierUDTF.java From incubator-hivemall with Apache License 2.0

4 votes

private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI, WritableFloatObjectInspector covarOI)
        throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI,
                    covarOI);
            }
        } else {
            LazySimpleSerDe serde =
                    HiveUtils.getLineSerde(labelOI, featureOI, weightOI, covarOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            StructField c4ref = lineOI.getStructFieldRef("c4");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();
            FloatObjectInspector c4refOI =
                    (FloatObjectInspector) c4ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    Object f3 = fields.get(3);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid unexpected case
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    float cov =
                            (f3 == null) ? WeightValueWithCovar.DEFAULT_COVAR : c4refOI.get(f3);
                    model.set(k, new WeightValueWithCovar(v, cov, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}

Source File: GenericHiveRecordCursor.java From presto with Apache License 2.0

4 votes

public GenericHiveRecordCursor(
        Configuration configuration,
        Path path,
        RecordReader<K, V> recordReader,
        long totalBytes,
        Properties splitSchema,
        List<HiveColumnHandle> columns,
        DateTimeZone hiveStorageTimeZone)
{
    requireNonNull(path, "path is null");
    requireNonNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(columns, "columns is null");
    requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");

    this.path = path;
    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;

    this.deserializer = getDeserializer(configuration, splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
        HiveColumnHandle column = columns.get(i);
        checkState(column.getColumnType() == REGULAR, "column type must be regular");

        types[i] = column.getType();
        hiveTypes[i] = column.getHiveType();

        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
    }
}

Source File: MulticlassOnlineClassifierUDTF.java From incubator-hivemall with Apache License 2.0

4 votes

private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI) throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI);
            }
        } else {
            LazySimpleSerDe serde = HiveUtils.getLineSerde(labelOI, featureOI, weightOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid the case that key or value is null
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    model.set(k, new WeightValue(v, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}

Source File: DynamoDBSerDe.java From emr-dynamodb-connector with Apache License 2.0

4 votes

@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
  // Prepare the field ObjectInspectors
  StructObjectInspector soi = (StructObjectInspector) objInspector;
  List<? extends StructField> fields = soi.getAllStructFieldRefs();
  List<Object> rowData = soi.getStructFieldsDataAsList(obj);
  Map<String, AttributeValue> item = Maps.newHashMap();

  validateData(fields, rowData);

  for (int i = 0; i < fields.size(); i++) {
    StructField field = fields.get(i);
    Object data = rowData.get(i);
    String columnName = columnNames.get(i);
    ObjectInspector fieldOI = field.getFieldObjectInspector();

    // Get the Hive to DynamoDB mapper
    HiveDynamoDBType ddType = typeMappings.get(columnName);

    // Check if this column maps a DynamoDB item.
    if (HiveDynamoDBTypeFactory.isHiveDynamoDBItemMapType(ddType)) {
      HiveDynamoDBItemType ddItemType = (HiveDynamoDBItemType) ddType;
      Map<String, AttributeValue> backupItem = ddItemType.parseDynamoDBData(data, fieldOI);

      // We give higher priority to attributes directly mapped to
      // columns. So we do not update the value of an attribute if
      // it already exists. This can happen in case of partial schemas
      // when there is a full backup column and attribute mapped
      // columns.
      for (Map.Entry<String, AttributeValue> entry : backupItem.entrySet()) {
        if (!columnMappings.containsValue(entry.getKey())) {
          item.put(entry.getKey(), entry.getValue());
        }
      }
    } else {
      // User has mapped individual attribute in DynamoDB to
      // corresponding Hive columns.
      AttributeValue attributeValue = data == null ?
          DynamoDBDataParser.getNullAttribute(nullSerialization) :
          ddType.getDynamoDBData(data, fieldOI, nullSerialization);

      if (attributeValue != null) {
        item.put(columnMappings.get(columnName), attributeValue);
      }
    }
  }

  return new DynamoDBItemWritable(item);
}

Source File: BitcoinTransactionHashSegwitUDF.java From hadoopcryptoledger with Apache License 2.0

4 votes

/**
* This method evaluates a given Object (of type BitcoinTransaction) or a struct which has all necessary fields corresponding to a BitcoinTransaction. The first case occurs, if the UDF evaluates data represented in a table provided by the HiveSerde as part of the hadoocryptoledger library. The second case occurs, if BitcoinTransaction data has been imported in a table in another format, such as ORC or Parquet.
* 
* @param arguments array of length 1 containing one object of type BitcoinTransaction or a Struct representing a BitcoinTransaction
*
* @return BytesWritable containing a byte array with the double hash of the BitcoinTransaction
*
* @throws org.apache.hadoop.hive.ql.metadata.HiveException in case an itnernal HiveError occurred
*/

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
	if ((arguments==null) || (arguments.length!=1)) { 
		return null;
	}
	BitcoinTransaction bitcoinTransaction;
	if (arguments[0].get() instanceof HiveBitcoinTransaction) { // this happens if the table is in the original file format
		 bitcoinTransaction = BitcoinUDFUtil.convertToBitcoinTransaction((HiveBitcoinTransaction)arguments[0].get());
	} else { // this happens if the table has been imported into a more optimized analytics format, such as ORC. However, usually we expect that the first case will be used mostly (the hash is generated during extraction from the input format)
		// check if all bitcointransaction fields are available <struct<version:int,incounter:binary,outcounter:binary,listofinputs:array<struct<prevtransactionhash:binary,previoustxoutindex:bigint,txinscriptlength:binary,txinscript:binary,seqno:bigint>>,listofoutputs:array<struct<value:bigint,txoutscriptlength:binary,txoutscript:binary>>,locktime:int>
		Object originalObject=arguments[0].get();
		StructField versionSF=soi.getStructFieldRef("version");
		StructField markerSF=soi.getStructFieldRef("marker");
		StructField flagSF=soi.getStructFieldRef("flag");
		StructField incounterSF=soi.getStructFieldRef("incounter");
		StructField outcounterSF=soi.getStructFieldRef("outcounter");
		StructField listofinputsSF=soi.getStructFieldRef("listofinputs");
		StructField listofoutputsSF=soi.getStructFieldRef("listofoutputs");
		StructField listofscriptwitnessitemSF=soi.getStructFieldRef("listofscriptwitnessitem");
		StructField locktimeSF=soi.getStructFieldRef("locktime");
		boolean inputsNull =  (incounterSF==null) || (listofinputsSF==null);
		boolean outputsNull = (outcounterSF==null) || (listofoutputsSF==null);
		boolean otherAttributeNull = (versionSF==null) || (locktimeSF==null);
		boolean segwitInformationNull = (markerSF==null) || (flagSF==null) || (listofscriptwitnessitemSF==null);
		if (inputsNull || outputsNull || otherAttributeNull || segwitInformationNull) {
			LOG.info("Structure does not correspond to BitcoinTransaction");
			return null;
		} 
		int version = wioi.get(soi.getStructFieldData(originalObject,versionSF));
		byte marker = wbyoi.get(soi.getStructFieldData(originalObject, markerSF));
		byte flag =  wbyoi.get(soi.getStructFieldData(originalObject, flagSF));
		byte[] inCounter = wboi.getPrimitiveJavaObject(soi.getStructFieldData(originalObject,incounterSF));
		byte[] outCounter = wboi.getPrimitiveJavaObject(soi.getStructFieldData(originalObject,outcounterSF));
		int locktime = wioi.get(soi.getStructFieldData(originalObject,locktimeSF));
		
		Object listofinputsObject = soi.getStructFieldData(originalObject,listofinputsSF);
		ListObjectInspector loiInputs=(ListObjectInspector)listofinputsSF.getFieldObjectInspector();
		List<BitcoinTransactionInput> listOfInputsArray = readListOfInputsFromTable(loiInputs,listofinputsObject);
		
		Object listofoutputsObject = soi.getStructFieldData(originalObject,listofoutputsSF);
		ListObjectInspector loiOutputs=(ListObjectInspector)listofoutputsSF.getFieldObjectInspector();
		List<BitcoinTransactionOutput> listOfOutputsArray = readListOfOutputsFromTable(loiOutputs,listofoutputsObject);
		
		Object listofscriptwitnessitemObject =  soi.getStructFieldData(originalObject,listofscriptwitnessitemSF);
		ListObjectInspector loiScriptWitnessItem=(ListObjectInspector)listofscriptwitnessitemSF.getFieldObjectInspector();
		List<BitcoinScriptWitnessItem> listOfScriptWitnessitemArray = readListOfBitcoinScriptWitnessFromTable(loiScriptWitnessItem,listofscriptwitnessitemObject);
		bitcoinTransaction = new BitcoinTransaction(marker, flag, version,inCounter,listOfInputsArray,outCounter,listOfOutputsArray,listOfScriptWitnessitemArray,locktime);

	}
	byte[] transactionHash=null;
	try {
		 transactionHash = BitcoinUtil.getTransactionHashSegwit(bitcoinTransaction);
	}  catch (IOException ioe) {
		LOG.error(ioe);
		throw new HiveException(ioe.toString());
	}
	return new BytesWritable(transactionHash);
}

Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0

4 votes

private static boolean searchAllFields(final ObjectInspector rootOI,
                                       final String name,
                                       final int[] childCounts,
                                       SearchResult position
) {
  Category category = rootOI.getCategory();
  if (category == Category.STRUCT) {
    position.index++; // first child is immediately next to parent
    StructObjectInspector sOi = (StructObjectInspector) rootOI;
    for (StructField sf : sOi.getAllStructFieldRefs()) {
      // We depend on the fact that caller takes care of calling current method
      // once for each segment in the selected column path. So, we should always get
      // searched field as immediate child
      if (sf.getFieldName().equalsIgnoreCase(name)) {
        position.oI = sf.getFieldObjectInspector();
        return true;
      } else {
        if (position.index >= childCounts.length) {
          return false;
        }
        position.index += childCounts[position.index];
      }
    }
  } else if (category == Category.MAP) {
    position.index++; // first child is immediately next to parent
    if (name.equalsIgnoreCase(HiveUtilities.MAP_KEY_FIELD_NAME)) {
      ObjectInspector kOi = ((MapObjectInspector) rootOI).getMapKeyObjectInspector();
      position.oI = kOi;
      return true;
    }
    if (position.index >= childCounts.length) {
      return false;
    }
    position.index += childCounts[position.index];
    if (name.equalsIgnoreCase(HiveUtilities.MAP_VALUE_FIELD_NAME)) {
      ObjectInspector vOi = ((MapObjectInspector) rootOI).getMapValueObjectInspector();
      position.oI = vOi;
      return true;
    }
  }
  return false;
}

Source File: HiveORCVectorizedReader.java From dremio-oss with Apache License 2.0

4 votes

private static boolean searchAllFields(final ObjectInspector rootOI,
                                       final String name,
                                       final int[] childCounts,
                                       SearchResult position
                                       ) {
  Category category = rootOI.getCategory();
  if (category == Category.STRUCT) {
    position.index++; // first child is immediately next to parent
    StructObjectInspector sOi = (StructObjectInspector) rootOI;
    for (StructField sf : sOi.getAllStructFieldRefs()) {
      // We depend on the fact that caller takes care of calling current method
      // once for each segment in the selected column path. So, we should always get
      // searched field as immediate child
      if (position.index >= childCounts.length) {
        // input schema has more columns than what reader can read
        return false;
      }
      if (sf.getFieldName().equalsIgnoreCase(name)) {
        position.oI = sf.getFieldObjectInspector();
        return true;
      } else {
        position.index += childCounts[position.index];
      }
    }
  } else if (category == Category.MAP) {
    position.index++; // first child is immediately next to parent
    if (position.index >= childCounts.length) {
      // input schema has more columns than what reader can read
      return false;
    }
    if (name.equalsIgnoreCase(HiveUtilities.MAP_KEY_FIELD_NAME)) {
      ObjectInspector kOi = ((MapObjectInspector) rootOI).getMapKeyObjectInspector();
      position.oI = kOi;
      return true;
    }
    position.index += childCounts[position.index];
    if (position.index >= childCounts.length) {
      // input schema has more columns than what reader can read
      return false;
    }
    if (name.equalsIgnoreCase(HiveUtilities.MAP_VALUE_FIELD_NAME)) {
      ObjectInspector vOi = ((MapObjectInspector) rootOI).getMapValueObjectInspector();
      position.oI = vOi;
      return true;
    }
  }
  return false;
}

Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.StructField#getFieldObjectInspector()