Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector#getStructFieldData()
The following examples show how to use
org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector#getStructFieldData() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestGeoJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0 | 6 votes |
@Test public void TestDateParse() throws Exception { Configuration config = new Configuration(); Text value = new Text(); AbstractSerDe jserde = new GeoJsonSerDe(); Properties proptab = new Properties(); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when"); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date"); jserde.initialize(config, proptab); StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector(); value.set("{\"properties\":{\"when\":\"2020-02-20\"}}"); Object row = jserde.deserialize(value); StructField f0 = rowOI.getStructFieldRef("when"); Object fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals("2020-02-20", ((DateWritable)fieldData).get().toString()); value.set("{\"properties\":{\"when\":\"2017-05-05\"}}"); row = jserde.deserialize(value); fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals("2017-05-05", ((DateWritable)fieldData).get().toString()); }
Example 2
Source File: TestEsriJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0 | 6 votes |
@Test public void TestDateParse() throws Exception { Configuration config = new Configuration(); Text value = new Text(); AbstractSerDe jserde = new EsriJsonSerDe(); Properties proptab = new Properties(); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when"); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date"); jserde.initialize(config, proptab); StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector(); value.set("{\"attributes\":{\"when\":\"2020-02-20\"}}"); Object row = jserde.deserialize(value); StructField f0 = rowOI.getStructFieldRef("when"); Object fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals("2020-02-20", ((DateWritable)fieldData).get().toString()); value.set("{\"attributes\":{\"when\":\"2017-05-05\"}}"); row = jserde.deserialize(value); fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals("2017-05-05", ((DateWritable)fieldData).get().toString()); }
Example 3
Source File: TestEsriJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0 | 6 votes |
@Test public void TestIntParse() throws Exception { Configuration config = new Configuration(); Text value = new Text(); AbstractSerDe jserde = new EsriJsonSerDe(); Properties proptab = new Properties(); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "num"); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "int"); jserde.initialize(config, proptab); StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector(); //value.set("{\"attributes\":{\"num\":7},\"geometry\":null}"); value.set("{\"attributes\":{\"num\":7}}"); Object row = jserde.deserialize(value); StructField f0 = rowOI.getStructFieldRef("num"); Object fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals(7, ((IntWritable)fieldData).get()); value.set("{\"attributes\":{\"num\":9}}"); row = jserde.deserialize(value); f0 = rowOI.getStructFieldRef("num"); fieldData = rowOI.getStructFieldData(row, f0); Assert.assertEquals(9, ((IntWritable)fieldData).get()); }
Example 4
Source File: IndexRSerde.java From indexr with Apache License 2.0 | 6 votes |
@Override public Writable serialize(Object obj, ObjectInspector objectInspector) throws SerDeException { if (!objectInspector.getCategory().equals(ObjectInspector.Category.STRUCT)) { throw new SerDeException("Cannot serialize " + objectInspector.getCategory() + ". Can only serialize a struct"); } StructObjectInspector inspector = (StructObjectInspector) objectInspector; List<? extends StructField> fields = inspector.getAllStructFieldRefs(); Writable[] arr = new Writable[fields.size()]; for (int i = 0; i < fields.size(); i++) { StructField field = fields.get(i); Object subObj = inspector.getStructFieldData(obj, field); ObjectInspector subInspector = field.getFieldObjectInspector(); arr[i] = createPrimitive(subObj, (PrimitiveObjectInspector) subInspector); } serdeSize = arr.length; return new ArrayWritable(Writable.class, arr); }
Example 5
Source File: TestEsriJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0 | 6 votes |
@Test public void TestEpochParse() throws Exception { Configuration config = new Configuration(); Text value = new Text(); AbstractSerDe jserde = new EsriJsonSerDe(); Properties proptab = new Properties(); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when"); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date"); jserde.initialize(config, proptab); StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector(); value.set("{\"attributes\":{\"when\":147147147147}}"); Object row = jserde.deserialize(value); StructField f0 = rowOI.getStructFieldRef("when"); Object fieldData = rowOI.getStructFieldData(row, f0); //Assert.assertEquals(147147147147L, ((DateWritable)fieldData).get().getTime()); Assert.assertEquals(new java.sql.Date(147147147147L).toString(), ((DateWritable)fieldData).get().toString()); value.set("{\"attributes\":{\"when\":142857142857}}"); row = jserde.deserialize(value); fieldData = rowOI.getStructFieldData(row, f0); //Assert.assertEquals(142857142857L, ((DateWritable)fieldData).get()); Assert.assertEquals(new java.sql.Date(142857142857L).toString(), ((DateWritable)fieldData).get().toString()); }
Example 6
Source File: TestGeoJsonSerDe.java From spatial-framework-for-hadoop with Apache License 2.0 | 6 votes |
@Test public void TestEpochParse() throws Exception { Configuration config = new Configuration(); Text value = new Text(); AbstractSerDe jserde = new GeoJsonSerDe(); Properties proptab = new Properties(); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when"); proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date"); jserde.initialize(config, proptab); StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector(); value.set("{\"properties\":{\"when\":147147147147}}"); Object row = jserde.deserialize(value); StructField f0 = rowOI.getStructFieldRef("when"); Object fieldData = rowOI.getStructFieldData(row, f0); //Assert.assertEquals(147147147147L, ((DateWritable)fieldData).get().getTime()); Assert.assertEquals(new java.sql.Date(147147147147L).toString(), ((DateWritable)fieldData).get().toString()); value.set("{\"properties\":{\"when\":142857142857}}"); row = jserde.deserialize(value); fieldData = rowOI.getStructFieldData(row, f0); //Assert.assertEquals(142857142857L, ((DateWritable)fieldData).get()); Assert.assertEquals(new java.sql.Date(142857142857L).toString(), ((DateWritable)fieldData).get().toString()); }
Example 7
Source File: JdbcSerDe.java From HiveJdbcStorageHandler with Apache License 2.0 | 6 votes |
/** * This method takes an object representing a row of data from Hive, and uses * the ObjectInspector to get the data for each column and serialize. */ @Override public DbRecordWritable serialize(Object row, ObjectInspector inspector) throws SerDeException { final StructObjectInspector structInspector = (StructObjectInspector) inspector; final List<? extends StructField> fields = structInspector.getAllStructFieldRefs(); if(fields.size() != fieldCount) { throw new SerDeException(String.format("Required %d columns, received %d.", fieldCount, fields.size())); } cachedWritable.clear(); for(int i = 0; i < fieldCount; i++) { StructField structField = fields.get(i); if(structField != null) { Object field = structInspector.getStructFieldData(row, structField); ObjectInspector fieldOI = structField.getFieldObjectInspector(); Object javaObject = HiveJdbcBridgeUtils.deparseObject(field, fieldOI); cachedWritable.set(i, javaObject); } } return cachedWritable; }
Example 8
Source File: OrcTester.java From presto with Apache License 2.0 | 5 votes |
private static void assertFileContentsOrcHive( Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Reader reader = OrcFile.createReader( new Path(tempFile.getFile().getAbsolutePath()), new ReaderOptions(configuration)); RecordReader recordReader = reader.rows(); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
Example 9
Source File: BlurSerializer.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private String getFieldData(String columnName, Object data, StructObjectInspector structObjectInspector, Map<String, StructField> allStructFieldRefs, String name) throws SerDeException { StructField structField = allStructFieldRefs.get(name); ObjectInspector fieldObjectInspector = structField.getFieldObjectInspector(); Object structFieldData = structObjectInspector.getStructFieldData(data, structField); if (fieldObjectInspector instanceof PrimitiveObjectInspector) { return toString(columnName, structFieldData, (PrimitiveObjectInspector) fieldObjectInspector); } else { throw new SerDeException("Embedded non-primitive type is not supported columnName [" + columnName + "] objectInspector [" + fieldObjectInspector + "]."); } }
Example 10
Source File: HiveKuduSerDe.java From HiveKudu-Handler with Apache License 2.0 | 5 votes |
@Override public HiveKuduWritable serialize(Object row, ObjectInspector inspector) throws SerDeException { final StructObjectInspector structInspector = (StructObjectInspector) inspector; final List<? extends StructField> fields = structInspector.getAllStructFieldRefs(); if (fields.size() != fieldCount) { throw new SerDeException(String.format( "Required %d columns, received %d.", fieldCount, fields.size())); } cachedWritable.clear(); for (int i = 0; i < fieldCount; i++) { StructField structField = fields.get(i); if (structField != null) { Object field = structInspector.getStructFieldData(row, structField); ObjectInspector fieldOI = structField.getFieldObjectInspector(); Object javaObject = HiveKuduBridgeUtils.deparseObject(field, fieldOI); LOG.warn("Column value of " + i + " is " + javaObject.toString()); cachedWritable.set(i, javaObject); } } return cachedWritable; }
Example 11
Source File: TestAzureEntitySerDe.java From azure-tables-hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("serial") public void testPropertyNotFound(boolean requireFieldExists) throws Exception { WritableEntity entity = new WritableEntity(); entity.setProperties(new LinkedHashMap<String, EntityProperty>() {{ put("a", new EntityProperty(7)); put("b", new EntityProperty("hello")); }}); AzureEntitySerDe serDe = new AzureEntitySerDe(); Properties tbl = new Properties(); tbl.put(LIST_COLUMNS, "a,b,c"); tbl.put(LIST_COLUMN_TYPES, "int,string,int"); Configuration conf = new Configuration(); if (requireFieldExists) { conf.setBoolean(Keys.REQUIRE_FIELD_EXISTS.getKey(), true); } serDe.initialize(conf, tbl); StructObjectInspector inspector = (StructObjectInspector)serDe.getObjectInspector(); assertEquals(7, inspector.getStructFieldData(entity, inspector.getStructFieldRef("a"))); try { Object returned = inspector.getStructFieldData(entity, inspector.getStructFieldRef("c")); if (requireFieldExists) { fail("Should've thrown here."); } else { assertNull(returned); } } catch (IllegalArgumentException ex) { if (requireFieldExists) { assertEquals("No property found with name c. Properties found: a,b", ex.getMessage()); } else { throw ex; } } }
Example 12
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_array_of_records() throws Exception { final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc")); List<GenericRecord> innerRecords = new LinkedList<>(); final GenericRecord outerRecord = new GenericData.Record(schema); Schema arraySchema = schema.getField("records").schema(); Schema innerRecordSchema = arraySchema.getElementType(); final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema); innerRecord1.put("name", "Joe"); innerRecord1.put("age", 42); innerRecords.add(innerRecord1); final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema); innerRecord2.put("name", "Mary"); innerRecord2.put("age", 28); innerRecords.add(innerRecord2); GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords); outerRecord.put("records", array); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, out); dataFileWriter.append(outerRecord); } out.close(); // Build a flow file from the Avro record Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + "(records ARRAY<STRUCT<name:STRING, age:INT>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema)); // Verify the record contains an array Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records")); assertTrue(arrayFieldObject instanceof ArrayList); ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject; assertEquals(2, arrayField.size()); // Verify the first element. Should be a record with two fields "name" and "age" Object element = arrayField.get(0); assertTrue(element instanceof OrcStruct); StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema)); Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Joe", nameObject.toString()); Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(42, ((IntWritable) ageObject).get()); // Verify the first element. Should be a record with two fields "name" and "age" element = arrayField.get(1); assertTrue(element instanceof OrcStruct); nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Mary", nameObject.toString()); ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(28, ((IntWritable) ageObject).get()); }
Example 13
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example 14
Source File: JsonSerDeTestingBase.java From spatial-framework-for-hadoop with Apache License 2.0 | 4 votes |
protected Object getField(String col, Object row, StructObjectInspector rowOI) { StructField f0 = rowOI.getStructFieldRef(col); return rowOI.getStructFieldData(row, f0); }
Example 15
Source File: SMSerDe.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
/** * This method takes an object representing a row of data from Hive, and * uses the ObjectInspector to get the data for each column and serialize * it. */ //@Override public Writable serialize(Object obj, ObjectInspector oi) throws SerDeException { ExecRow row = null; int[] execRowFormatIds = null; try { List<NameType> nameTypes = sqlUtil.getTableStructure(tableName); execRowFormatIds = sqlUtil.getExecRowFormatIds(colNames, nameTypes); row = sqlUtil.getExecRow(execRowFormatIds); if (row == null) throw new SerDeException("ExecRow Cannot be Null"); } catch (SQLException | StandardException | IOException e1) { throw new SerDeException(e1); } if (Log.isTraceEnabled()) SpliceLogUtils.trace(Log, "serialize with obj=%s, oi=%s",obj,oi); if (oi.getCategory() != ObjectInspector.Category.STRUCT) { throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + oi.getTypeName()); } StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); try { DataValueDescriptor dvd; for (int i = 0; i < fields.size(); i++) { StructField field = fields.get(i); dvd = row.getColumn(i+1); ObjectInspector fieldOI = field.getFieldObjectInspector(); Object fieldObj = soi.getStructFieldData(obj, field); PrimitiveObjectInspector primOI = (PrimitiveObjectInspector) fieldOI; Object data = primOI.getPrimitiveJavaObject(fieldObj); PrimitiveCategory primitiveCategory = primOI.getPrimitiveCategory(); switch (primitiveCategory) { case BYTE: dvd.setValue(((Byte) data).byteValue()); break; case INT: dvd.setValue(((Integer) data).intValue()); break; case VARCHAR: dvd.setValue(((HiveVarchar)data).getValue()); break; case CHAR: dvd.setValue(((HiveChar)data).getValue()); break; case STRING: dvd.setValue((String) data); break; case BINARY: dvd.setValue((SerializationUtils.serialize((Serializable) data))); // is this right? Should just be a byte[] break; case BOOLEAN: dvd.setValue(((Boolean) data).booleanValue()); break; case DECIMAL: dvd.setValue(((HiveDecimal) data).doubleValue()); break; case DOUBLE: dvd.setValue(((Double) data).doubleValue()); break; case FLOAT: dvd.setValue(((Float) data).floatValue()); break; case LONG: dvd.setValue(((Long) data).longValue()); break; case SHORT: dvd.setValue(((Short) data).shortValue()); break; case TIMESTAMP: dvd.setValue((Timestamp) data); break; case DATE: dvd.setValue((java.sql.Date) data); break; default: throw new SerDeException(String.format("Hive Type %s Not Supported Yet",primOI.getPrimitiveCategory())); } } } catch (StandardException e) { // TODO Auto-generated catch block throw new RuntimeException("Serialized Object To Java Type Error"); } ExecRowWritable rowWritable = new ExecRowWritable(WriteReadUtils.getExecRowFromTypeFormatIds(execRowFormatIds)); rowWritable.set(row); return rowWritable; }
Example 16
Source File: ExcelSpreadSheetCellDAOSerde.java From hadoopoffice with Apache License 2.0 | 4 votes |
/** * Writes a row in Hive containing exactly 5 elements (String) to a SpreadSheetCellDAO. Order: "formattedValue","comment","formula","address","sheetName" * */ @Override public Writable serialize(Object arg0, ObjectInspector arg1) throws SerDeException { if (!(arg1 instanceof StructObjectInspector)) { throw new SerDeException("Expect a row of Strings for serialization"); } final StructObjectInspector outputOI = (StructObjectInspector) arg1; final List<? extends StructField> outputFields = outputOI.getAllStructFieldRefs(); if (outputFields.size()!=ExcelSpreadSheetCellDAOSerde.columnNames.length) { throw new SerDeException("Expected "+ExcelSpreadSheetCellDAOSerde.columnNames.length+" fields characterizing a cell: \"formattedValue\",\"comment\",\"formula\",\"address\",\"sheetName\", but found "+outputFields.size()+" fields"); } if (arg0==null) { return null; } // get field data // formattedValue int columnNum=0; final Object foFormattedValue = outputOI.getStructFieldData(arg0, outputFields.get(columnNum)); final ObjectInspector oiFormattedValue = outputFields.get(columnNum).getFieldObjectInspector(); String formattedValue = String.valueOf(((PrimitiveObjectInspector) oiFormattedValue).getPrimitiveJavaObject(foFormattedValue)); // comment columnNum=1; final Object foComment= outputOI.getStructFieldData(arg0, outputFields.get(columnNum)); final ObjectInspector oiComment = outputFields.get(columnNum).getFieldObjectInspector(); String comment = String.valueOf(((PrimitiveObjectInspector) oiComment).getPrimitiveJavaObject(foComment)); // formula columnNum=2; final Object foFormula= outputOI.getStructFieldData(arg0, outputFields.get(columnNum)); final ObjectInspector oiFormula = outputFields.get(columnNum).getFieldObjectInspector(); String formula = String.valueOf(((PrimitiveObjectInspector) oiFormula).getPrimitiveJavaObject(foFormula)); // address columnNum=3; final Object foAddress= outputOI.getStructFieldData(arg0, outputFields.get(columnNum)); final ObjectInspector oiAddress = outputFields.get(columnNum).getFieldObjectInspector(); String address = String.valueOf(((PrimitiveObjectInspector) oiAddress).getPrimitiveJavaObject(foAddress)); // sheetName columnNum=4; final Object foSheetName= outputOI.getStructFieldData(arg0, outputFields.get(columnNum)); final ObjectInspector oiSheetName = outputFields.get(columnNum).getFieldObjectInspector(); String sheetName = String.valueOf(((PrimitiveObjectInspector) oiSheetName).getPrimitiveJavaObject(foSheetName)); return new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName); }
Example 17
Source File: HiveTextReader.java From dremio-oss with Apache License 2.0 | 4 votes |
@Override public int populateData() throws IOException, SerDeException { final SkipRecordsInspector skipRecordsInspector = this.skipRecordsInspector; final RecordReader<Object, Object> reader = this.reader; final Converter partTblObjectInspectorConverter = this.partTblObjectInspectorConverter; final Object key = this.key; final int numRowsPerBatch = (int) this.numRowsPerBatch; final StructField[] selectedStructFieldRefs = this.selectedStructFieldRefs; final AbstractSerDe partitionSerDe = this.partitionSerDe; final StructObjectInspector finalOI = this.finalOI; final ObjectInspector[] selectedColumnObjInspectors = this.selectedColumnObjInspectors; final HiveFieldConverter[] selectedColumnFieldConverters = this.selectedColumnFieldConverters; final ValueVector[] vectors = this.vectors; skipRecordsInspector.reset(); Object value; int recordCount = 0; while (recordCount < numRowsPerBatch) { try (OperatorStats.WaitRecorder recorder = OperatorStats.getWaitRecorder(this.context.getStats())) { boolean hasNext = reader.next(key, value = skipRecordsInspector.getNextValue()); if (!hasNext) { break; } } catch(FSError e) { throw HadoopFileSystemWrapper.propagateFSError(e); } if (skipRecordsInspector.doSkipHeader(recordCount++)) { continue; } Object bufferedValue = skipRecordsInspector.bufferAdd(value); if (bufferedValue != null) { Object deSerializedValue = partitionSerDe.deserialize((Writable) bufferedValue); if (partTblObjectInspectorConverter != null) { deSerializedValue = partTblObjectInspectorConverter.convert(deSerializedValue); } for (int i = 0; i < selectedStructFieldRefs.length; i++) { Object hiveValue = finalOI.getStructFieldData(deSerializedValue, selectedStructFieldRefs[i]); if (hiveValue != null) { selectedColumnFieldConverters[i].setSafeValue(selectedColumnObjInspectors[i], hiveValue, vectors[i], skipRecordsInspector.getActualCount()); } } skipRecordsInspector.incrementActualCount(); } skipRecordsInspector.incrementTempCount(); } for (int i = 0; i < selectedStructFieldRefs.length; i++) { vectors[i].setValueCount(skipRecordsInspector.getActualCount()); } skipRecordsInspector.updateContinuance(); return skipRecordsInspector.getActualCount(); }
Example 18
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_array_of_records() throws Exception { final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc")); List<GenericRecord> innerRecords = new LinkedList<>(); final GenericRecord outerRecord = new GenericData.Record(schema); Schema arraySchema = schema.getField("records").schema(); Schema innerRecordSchema = arraySchema.getElementType(); final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema); innerRecord1.put("name", "Joe"); innerRecord1.put("age", 42); innerRecords.add(innerRecord1); final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema); innerRecord2.put("name", "Mary"); innerRecord2.put("age", 28); innerRecords.add(innerRecord2); GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords); outerRecord.put("records", array); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, out); dataFileWriter.append(outerRecord); } out.close(); // Build a flow file from the Avro record Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + "(records ARRAY<STRUCT<name:STRING, age:INT>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema)); // Verify the record contains an array Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records")); assertTrue(arrayFieldObject instanceof ArrayList); ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject; assertEquals(2, arrayField.size()); // Verify the first element. Should be a record with two fields "name" and "age" Object element = arrayField.get(0); assertTrue(element instanceof OrcStruct); StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema)); Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Joe", nameObject.toString()); Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(42, ((IntWritable) ageObject).get()); // Verify the first element. Should be a record with two fields "name" and "age" element = arrayField.get(1); assertTrue(element instanceof OrcStruct); nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Mary", nameObject.toString()); ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(28, ((IntWritable) ageObject).get()); }
Example 19
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); // This is pretty awkward and messy. The map object is a Map (not a MapWritable) but the keys are writables (in this case Text) // and so are the values (DoubleWritables in this case). Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example 20
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_primitive_record() throws Exception { GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World"); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record"); fileWriter.append(record); // And one more record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!"); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string")); assertTrue(stringFieldObject instanceof Text); assertEquals("World", stringFieldObject.toString()); }