org.apache.hadoop.hive.ql.io.orc.OrcStruct Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.orc.OrcStruct.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcTestTools.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Given the fact that we couldn't access OrcStruct easily, here uses the hacky way(reflection) * to go around access modifier for integration test purpose only. * @param realRow A row containing a list of Java objects. * @param struct An {@link OrcStruct} which essentially is a list of {@link Writable} objects. */ private boolean compareJavaRowAndOrcStruct(Object realRow, OrcStruct struct) { boolean isIdentical = true; ArrayList<Object> javaObjRow = (ArrayList) realRow; try { Field objectArr = OrcStruct.class.getDeclaredField("fields"); objectArr.setAccessible(true); Object[] dataArr = (Object[]) objectArr.get(struct); int index = 0; for (Object dataField : dataArr) { if (dataField instanceof OrcStruct) { isIdentical = isIdentical && compareJavaRowAndOrcStruct(javaObjRow.get(index), (OrcStruct) dataField); } else { isIdentical = isIdentical && objCastHelper(javaObjRow.get(index), (Writable) dataField); } index++; } } catch (NoSuchFieldException | IllegalAccessException nfe) { throw new RuntimeException("Failed in compare a java object row and orcstruct"); } return isIdentical; }
Example #2
Source File: OrcTestTools.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * * @param expected * @param observed * @param allowDifferentOrder ORC tools will not use this parameter currently. * @param blacklistRecordFields ORC tools will not use this parameter currently. * @return If two sets of files are identical. * Note that there might be an ordering issue in this comparison method. When one is drafting an ORC integration * test, try to name all json files differently. */ @Override public boolean checkSameFilesAndRecords(TreeMap<String, OrcRowIterator> expected, TreeMap<String, OrcRowIterator> observed, boolean allowDifferentOrder, Collection<String> blacklistRecordFields, boolean allowDifferentSchema) { Iterator<String> keys1 = expected.navigableKeySet().iterator(); Iterator<String> keys2 = observed.navigableKeySet().iterator(); return compareIterators(keys1, keys2, (key1, key2) -> { // ORC file doesn't have extension by Linkedin's convention. if (!removeExtension(key1).equals(key2)) { log.error(String.format("Mismatched files: %s and %s", key1, key2)); return false; } OrcRowIterator it1 = expected.get(key1); OrcRowIterator it2 = observed.get(key2); if (!it1.getTypeInfo().equals(it2.getTypeInfo())) { log.error(String.format("Mismatched Typeinfo: %s and %s", key1, key2)); return false; } boolean result = true; while (it1.hasNext()) { if (!it2.hasNext() || !result) { return false; } result = compareJavaRowAndOrcStruct(((AvroRow) it1.next()).getRow(), (OrcStruct) it2.next()); } return result; }); }
Example #3
Source File: OrcStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void setLocation(String location, Job job) throws IOException { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); if (!UDFContext.getUDFContext().isFrontend()) { typeInfo = (TypeInfo)ObjectSerializer.deserialize(p.getProperty(signature + SchemaSignatureSuffix)); } else if (typeInfo == null) { typeInfo = getTypeInfo(location, job); } if (typeInfo != null && oi == null) { oi = OrcStruct.createObjectInspector(typeInfo); } if (!UDFContext.getUDFContext().isFrontend()) { if (p.getProperty(signature + RequiredColumnsSuffix) != null) { mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p .getProperty(signature + RequiredColumnsSuffix)); job.getConfiguration().setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, getReqiredColumnIdString(mRequiredColumns)); if (p.getProperty(signature + SearchArgsSuffix) != null) { // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, getReqiredColumnNamesString(getSchema(location, job), mRequiredColumns)); } } else if (p.getProperty(signature + SearchArgsSuffix) != null) { // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, getReqiredColumnNamesString(getSchema(location, job))); } if (p.getProperty(signature + SearchArgsSuffix) != null) { job.getConfiguration().set(SARG_PUSHDOWN, p.getProperty(signature + SearchArgsSuffix)); } } FileInputFormat.setInputPaths(job, location); }
Example #4
Source File: PutORCTest.java From nifi with Apache License 2.0 | 5 votes |
private void verifyORCUsers(final Path orcUsers, final int numExpectedUsers, BiFunction<List<Object>, Integer, Void> assertFunction) throws IOException { Reader reader = OrcFile.createReader(orcUsers, OrcFile.readerOptions(testConf)); RecordReader recordReader = reader.rows(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<name:string,favorite_number:int,favorite_color:string,scale:double>"); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(typeInfo); int currUser = 0; Object nextRecord = null; while ((nextRecord = recordReader.next(nextRecord)) != null) { Assert.assertNotNull(nextRecord); Assert.assertTrue("Not an OrcStruct", nextRecord instanceof OrcStruct); List<Object> x = inspector.getStructFieldsDataAsList(nextRecord); if (assertFunction == null) { assertEquals("name" + currUser, x.get(0).toString()); assertEquals(currUser, ((IntWritable) x.get(1)).get()); assertEquals("blue" + currUser, x.get(2).toString()); assertEquals(10.0 * currUser, ((DoubleWritable) x.get(3)).get(), Double.MIN_VALUE); } else { assertFunction.apply(x, currUser); } currUser++; } assertEquals(numExpectedUsers, currUser); }
Example #5
Source File: OrcTester.java From presto with Apache License 2.0 | 4 votes |
private static Object decodeRecordReaderValue(Type type, Object actualValue) { if (actualValue instanceof BooleanWritable) { actualValue = ((BooleanWritable) actualValue).get(); } else if (actualValue instanceof ByteWritable) { actualValue = ((ByteWritable) actualValue).get(); } else if (actualValue instanceof BytesWritable) { actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes()); } else if (actualValue instanceof DateWritable) { actualValue = new SqlDate(((DateWritable) actualValue).getDays()); } else if (actualValue instanceof DoubleWritable) { actualValue = ((DoubleWritable) actualValue).get(); } else if (actualValue instanceof FloatWritable) { actualValue = ((FloatWritable) actualValue).get(); } else if (actualValue instanceof IntWritable) { actualValue = ((IntWritable) actualValue).get(); } else if (actualValue instanceof HiveCharWritable) { actualValue = ((HiveCharWritable) actualValue).getPaddedValue().toString(); } else if (actualValue instanceof LongWritable) { actualValue = ((LongWritable) actualValue).get(); } else if (actualValue instanceof ShortWritable) { actualValue = ((ShortWritable) actualValue).get(); } else if (actualValue instanceof HiveDecimalWritable) { DecimalType decimalType = (DecimalType) type; HiveDecimalWritable writable = (HiveDecimalWritable) actualValue; // writable messes with the scale so rescale the values to the Presto type BigInteger rescaledValue = rescale(writable.getHiveDecimal().unscaledValue(), writable.getScale(), decimalType.getScale()); actualValue = new SqlDecimal(rescaledValue, decimalType.getPrecision(), decimalType.getScale()); } else if (actualValue instanceof Text) { actualValue = actualValue.toString(); } else if (actualValue instanceof TimestampWritable) { TimestampWritable timestamp = (TimestampWritable) actualValue; actualValue = sqlTimestampOf((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), SESSION); } else if (actualValue instanceof OrcStruct) { List<Object> fields = new ArrayList<>(); OrcStruct structObject = (OrcStruct) actualValue; for (int fieldId = 0; fieldId < structObject.getNumFields(); fieldId++) { fields.add(OrcUtil.getFieldValue(structObject, fieldId)); } actualValue = decodeRecordReaderStruct(type, fields); } else if (actualValue instanceof List) { actualValue = decodeRecordReaderList(type, ((List<?>) actualValue)); } else if (actualValue instanceof Map) { actualValue = decodeRecordReaderMap(type, (Map<?, ?>) actualValue); } return actualValue; }
Example #6
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_primitive_record() throws Exception { GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World"); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record"); fileWriter.append(record); // And one more record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!"); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string")); assertTrue(stringFieldObject instanceof Text); assertEquals("World", stringFieldObject.toString()); }
Example #7
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); // This is pretty awkward and messy. The map object is a Map (not a MapWritable) but the keys are writables (in this case Text) // and so are the values (DoubleWritables in this case). Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example #8
Source File: TestConvertAvroToORC.java From localization_nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_array_of_records() throws Exception { final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc")); List<GenericRecord> innerRecords = new LinkedList<>(); final GenericRecord outerRecord = new GenericData.Record(schema); Schema arraySchema = schema.getField("records").schema(); Schema innerRecordSchema = arraySchema.getElementType(); final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema); innerRecord1.put("name", "Joe"); innerRecord1.put("age", 42); innerRecords.add(innerRecord1); final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema); innerRecord2.put("name", "Mary"); innerRecord2.put("age", 28); innerRecords.add(innerRecord2); GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords); outerRecord.put("records", array); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, out); dataFileWriter.append(outerRecord); } out.close(); // Build a flow file from the Avro record Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + "(records ARRAY<STRUCT<name:STRING, age:INT>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema)); // Verify the record contains an array Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records")); assertTrue(arrayFieldObject instanceof ArrayList); ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject; assertEquals(2, arrayField.size()); // Verify the first element. Should be a record with two fields "name" and "age" Object element = arrayField.get(0); assertTrue(element instanceof OrcStruct); StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema)); Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Joe", nameObject.toString()); Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(42, ((IntWritable) ageObject).get()); // Verify the first element. Should be a record with two fields "name" and "age" element = arrayField.get(1); assertTrue(element instanceof OrcStruct); nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Mary", nameObject.toString()); ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(28, ((IntWritable) ageObject).get()); }
Example #9
Source File: TestOrcStorage.java From spork with Apache License 2.0 | 4 votes |
@SuppressWarnings("rawtypes") private void compareData(Object expected, Object actual) { if (expected instanceof Text) { assertEquals(String.class, actual.getClass()); assertEquals(expected.toString(), actual); } else if (expected instanceof ShortWritable) { assertEquals(Integer.class, actual.getClass()); assertEquals((int)((ShortWritable) expected).get(), actual); } else if (expected instanceof IntWritable) { assertEquals(Integer.class, actual.getClass()); assertEquals(((IntWritable) expected).get(), actual); } else if (expected instanceof LongWritable) { assertEquals(Long.class, actual.getClass()); assertEquals(((LongWritable) expected).get(), actual); } else if (expected instanceof FloatWritable) { assertEquals(Float.class, actual.getClass()); assertEquals(((FloatWritable) expected).get(), actual); } else if (expected instanceof HiveDecimalWritable) { assertEquals(BigDecimal.class, actual.getClass()); assertEquals(((HiveDecimalWritable) expected).toString(), actual.toString()); } else if (expected instanceof DoubleWritable) { assertEquals(Double.class, actual.getClass()); assertEquals(((DoubleWritable) expected).get(), actual); } else if (expected instanceof BooleanWritable) { assertEquals(Boolean.class, actual.getClass()); assertEquals(((BooleanWritable) expected).get(), actual); } else if (expected instanceof TimestampWritable) { assertEquals(DateTime.class, actual.getClass()); assertEquals(((TimestampWritable) expected).getTimestamp().getTime(), ((DateTime) actual).getMillis()); } else if (expected instanceof BytesWritable) { assertEquals(DataByteArray.class, actual.getClass()); BytesWritable bw = (BytesWritable) expected; assertEquals(new DataByteArray(bw.getBytes(), 0, bw.getLength()), actual); } else if (expected instanceof ByteWritable) { assertEquals(Integer.class, actual.getClass()); assertEquals((int) ((ByteWritable) expected).get(), actual); } else if (expected instanceof OrcStruct) { assertEquals(BinSedesTuple.class, actual.getClass()); // TODO: compare actual values. No getters in OrcStruct } else if (expected instanceof ArrayList) { assertEquals(DefaultDataBag.class, actual.getClass()); // TODO: compare actual values. No getters in OrcStruct } else if (expected instanceof HashMap) { assertEquals(HashMap.class, actual.getClass()); assertEquals(((HashMap) expected).size(), ((HashMap) actual).size()); // TODO: compare actual values. No getters in OrcStruct } else if (expected == null) { assertEquals(expected, actual); } else { Assert.fail("Unknown object type: " + expected.getClass().getName()); } }
Example #10
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_primitive_record() throws Exception { GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World"); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record"); fileWriter.append(record); // And one more record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!"); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string")); assertTrue(stringFieldObject instanceof Text); assertEquals("World", stringFieldObject.toString()); }
Example #11
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_complex_record() throws Exception { Map<String, Double> mapData1 = new TreeMap<String, Double>() {{ put("key1", 1.0); put("key2", 2.0); }}; GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, Double> mapData2 = new TreeMap<String, Double>() {{ put("key1", 3.0); put("key2", 4.0); }}; record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // Check some fields in the first row Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); assertTrue(intFieldObject instanceof IntWritable); assertEquals(10, ((IntWritable) intFieldObject).get()); Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); assertTrue(mapFieldObject instanceof Map); Map map = (Map) mapFieldObject; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); mapValue = map.get(new Text("key2")); assertNotNull(mapValue); assertTrue(mapValue instanceof DoubleWritable); assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); }
Example #12
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_array_of_records() throws Exception { final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc")); List<GenericRecord> innerRecords = new LinkedList<>(); final GenericRecord outerRecord = new GenericData.Record(schema); Schema arraySchema = schema.getField("records").schema(); Schema innerRecordSchema = arraySchema.getElementType(); final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema); innerRecord1.put("name", "Joe"); innerRecord1.put("age", 42); innerRecords.add(innerRecord1); final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema); innerRecord2.put("name", "Mary"); innerRecord2.put("age", 28); innerRecords.add(innerRecord2); GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords); outerRecord.put("records", array); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, out); dataFileWriter.append(outerRecord); } out.close(); // Build a flow file from the Avro record Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + "(records ARRAY<STRUCT<name:STRING, age:INT>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema)); // Verify the record contains an array Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records")); assertTrue(arrayFieldObject instanceof ArrayList); ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject; assertEquals(2, arrayField.size()); // Verify the first element. Should be a record with two fields "name" and "age" Object element = arrayField.get(0); assertTrue(element instanceof OrcStruct); StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema)); Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Joe", nameObject.toString()); Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(42, ((IntWritable) ageObject).get()); // Verify the first element. Should be a record with two fields "name" and "age" element = arrayField.get(1); assertTrue(element instanceof OrcStruct); nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); assertTrue(nameObject instanceof Text); assertEquals("Mary", nameObject.toString()); ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); assertTrue(ageObject instanceof IntWritable); assertEquals(28, ((IntWritable) ageObject).get()); }
Example #13
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 4 votes |
@Test public void test_onTrigger_nested_complex_record() throws Exception { Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {{ put("key1", Arrays.asList(1.0, 2.0)); put("key2", Arrays.asList(3.0, 4.0)); }}; Map<String, String> arrayMap11 = new TreeMap<String, String>() {{ put("key1", "v1"); put("key2", "v2"); }}; Map<String, String> arrayMap12 = new TreeMap<String, String>() {{ put("key3", "v3"); put("key4", "v4"); }}; GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12)); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); // Put another record in Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {{ put("key1", Arrays.asList(-1.0, -2.0)); put("key2", Arrays.asList(-3.0, -4.0)); }}; Map<String, String> arrayMap21 = new TreeMap<String, String>() {{ put("key1", "v-1"); put("key2", "v-2"); }}; Map<String, String> arrayMap22 = new TreeMap<String, String>() {{ put("key3", "v-3"); put("key4", "v-4"); }}; record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22)); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); // Write the flow file out to disk, since the ORC Reader needs a path MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); FileOutputStream fos = new FileOutputStream("target/test1.orc"); fos.write(resultContents); fos.flush(); fos.close(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); Object o = rows.next(null); assertNotNull(o); assertTrue(o instanceof OrcStruct); TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema(); StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); // check values Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray")); assertTrue(myMapOfArray instanceof Map); Map map = (Map) myMapOfArray; Object mapValue = map.get(new Text("key1")); assertNotNull(mapValue); assertTrue(mapValue instanceof List); assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue); Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap")); assertTrue(myArrayOfMap instanceof List); List list = (List) myArrayOfMap; Object el0 = list.get(0); assertNotNull(el0); assertTrue(el0 instanceof Map); assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1"))); }