org.apache.orc.Reader Java Examples
The following examples show how to use
org.apache.orc.Reader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcIterable.java From iceberg with Apache License 2.0 | 6 votes |
private static VectorizedRowBatchIterator newOrcIterator(InputFile file, TypeDescription readerSchema, Long start, Long length, Reader orcFileReader, SearchArgument sarg) { final Reader.Options options = orcFileReader.options(); if (start != null) { options.range(start, length); } options.schema(readerSchema); options.searchArgument(sarg, new String[]{}); try { return new VectorizedRowBatchIterator(file.location(), readerSchema, orcFileReader.rows(options)); } catch (IOException ioe) { throw new RuntimeIOException(ioe, "Failed to get ORC rows for file: %s", file); } }
Example #2
Source File: OrcShimV200.java From flink with Apache License 2.0 | 6 votes |
protected Reader createReader(Path path, Configuration conf) throws IOException { try { Class orcFileClass = Class.forName("org.apache.hadoop.hive.ql.io.orc.OrcFile"); Object readerOptions = invokeStaticMethod(orcFileClass, "readerOptions", conf); Class readerClass = Class.forName("org.apache.hadoop.hive.ql.io.orc.ReaderImpl"); //noinspection unchecked return (Reader) invokeConstructor(readerClass, path, readerOptions); } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e) { throw new IOException(e); } }
Example #3
Source File: OrcFileSystemITCase.java From flink with Apache License 2.0 | 6 votes |
@Override public void testNonPartition() { super.testNonPartition(); // test configure success File directory = new File(URI.create(resultPath()).getPath()); File[] files = directory.listFiles((dir, name) -> !name.startsWith(".") && !name.startsWith("_")); Assert.assertNotNull(files); Path path = new Path(URI.create(files[0].getAbsolutePath())); try { Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(new Configuration())); if (configure) { Assert.assertEquals("SNAPPY", reader.getCompressionKind().toString()); } else { Assert.assertEquals("ZLIB", reader.getCompressionKind().toString()); } } catch (IOException e) { throw new RuntimeException(e); } }
Example #4
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testSplitStripesGivenSplits() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); OrcRowInputFormat spy = spy(rowOrcInputFormat); // mock options to check configuration of ORC reader Reader.Options options = spy(new Reader.Options()); doReturn(options).when(spy).getOptions(any()); FileInputSplit[] splits = spy.createInputSplits(3); spy.openInputFormat(); spy.open(splits[0]); verify(options).range(eq(3L), eq(137005L)); spy.open(splits[1]); verify(options).range(eq(137008L), eq(136182L)); spy.open(splits[2]); verify(options).range(eq(273190L), eq(123633L)); }
Example #5
Source File: PentahoOrcRecordReader.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
static Reader getReader( String fileName, Configuration conf ) { try { S3NCredentialUtils.applyS3CredentialsToHadoopConfigurationIfNecessary( fileName, conf ); Path filePath = new Path( S3NCredentialUtils.scrubFilePathIfNecessary( fileName ) ); FileSystem fs = FileSystem.get( filePath.toUri(), conf ); if ( !fs.exists( filePath ) ) { throw new NoSuchFileException( fileName ); } if ( fs.getFileStatus( filePath ).isDirectory() ) { PathFilter pathFilter = file -> file.getName().endsWith( ".orc" ); FileStatus[] fileStatuses = fs.listStatus( filePath, pathFilter ); if ( fileStatuses.length == 0 ) { throw new NoSuchFileException( fileName ); } filePath = fileStatuses[ 0 ].getPath(); } return OrcFile.createReader( filePath, OrcFile.readerOptions( conf ).filesystem( fs ) ); } catch ( IOException e ) { throw new IllegalArgumentException( "Unable to read data from file " + fileName, e ); } }
Example #6
Source File: ORC.java From iceberg with Apache License 2.0 | 6 votes |
public OrcIterator build() { Preconditions.checkNotNull(schema, "Schema is required"); try { Path path = new Path(file.location()); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); ColumnIdMap columnIds = new ColumnIdMap(); TypeDescription orcSchema = TypeConversion.toOrc(schema, columnIds); Reader.Options options = reader.options(); if (start != null) { options.range(start, length); } options.schema(orcSchema); return new OrcIterator(path, orcSchema, reader.rows(options)); } catch (IOException e) { throw new RuntimeException("Can't open " + file.location(), e); } }
Example #7
Source File: OrcBulkWriterTestUtil.java From flink with Apache License 2.0 | 6 votes |
public static void validate(File files, List<Record> expected) throws IOException { final File[] buckets = files.listFiles(); assertNotNull(buckets); assertEquals(1, buckets.length); final File[] partFiles = buckets[0].listFiles(); assertNotNull(partFiles); for (File partFile : partFiles) { assertTrue(partFile.length() > 0); OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(new Configuration()); Reader reader = OrcFile.createReader(new org.apache.hadoop.fs.Path(partFile.toURI()), readerOptions); assertEquals(3, reader.getNumberOfRows()); assertEquals(2, reader.getSchema().getFieldNames().size()); assertSame(reader.getCompressionKind(), CompressionKind.LZ4); assertTrue(reader.hasMetadataValue(USER_METADATA_KEY)); assertTrue(reader.getMetadataKeys().contains(USER_METADATA_KEY)); List<Record> results = getResults(reader); assertEquals(3, results.size()); assertEquals(results, expected); } }
Example #8
Source File: OrcCompactionTaskTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Read a output ORC compacted file into memory. * This only works if fields are int value. */ public List<OrcStruct> readOrcFile(Path orcFilePath) throws IOException, InterruptedException { ReaderImpl orcReader = new ReaderImpl(orcFilePath, new OrcFile.ReaderOptions(new Configuration())); Reader.Options options = new Reader.Options().schema(orcReader.getSchema()); OrcMapreduceRecordReader recordReader = new OrcMapreduceRecordReader(orcReader, options); List<OrcStruct> result = new ArrayList<>(); OrcStruct recordContainer; while (recordReader.nextKeyValue()) { recordContainer = (OrcStruct) OrcUtils.createValueRecursively(orcReader.getSchema()); OrcUtils.upConvertOrcStruct((OrcStruct) recordReader.getCurrentValue(), recordContainer, orcReader.getSchema()); result.add(recordContainer); } return result; }
Example #9
Source File: TestMetricsRowGroupFilterTypes.java From iceberg with Apache License 2.0 | 6 votes |
public void createOrcInputFile(List<Record> records) throws IOException { if (ORC_FILE.exists()) { Assert.assertTrue(ORC_FILE.delete()); } OutputFile outFile = Files.localOutput(ORC_FILE); try (FileAppender<Record> appender = ORC.write(outFile) .schema(FILE_SCHEMA) .createWriterFunc(GenericOrcWriter::buildWriter) .build()) { appender.addAll(records); } InputFile inFile = Files.localInput(ORC_FILE); try (Reader reader = OrcFile.createReader(new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) { Assert.assertEquals("Should create only one stripe", 1, reader.getStripes().size()); } ORC_FILE.deleteOnExit(); }
Example #10
Source File: OrcBulkWriterTestUtil.java From flink with Apache License 2.0 | 6 votes |
private static List<Record> getResults(Reader reader) throws IOException { List<Record> results = new ArrayList<>(); RecordReader recordReader = reader.rows(); VectorizedRowBatch batch = reader.getSchema().createRowBatch(); while (recordReader.nextBatch(batch)) { BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0]; LongColumnVector intVector = (LongColumnVector) batch.cols[1]; for (int r = 0; r < batch.size; r++) { String name = new String(stringVector.vector[r], stringVector.start[r], stringVector.length[r]); int age = (int) intVector.vector[r]; results.add(new Record(name, age)); } recordReader.close(); } return results; }
Example #11
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSplitStripesGivenSplits() throws IOException { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration()); OrcRowInputFormat spy = spy(rowOrcInputFormat); // mock options to check configuration of ORC reader Reader.Options options = spy(new Reader.Options()); doReturn(options).when(spy).getOptions(any()); FileInputSplit[] splits = spy.createInputSplits(3); spy.openInputFormat(); spy.open(splits[0]); verify(options).range(eq(3L), eq(137005L)); spy.open(splits[1]); verify(options).range(eq(137008L), eq(136182L)); spy.open(splits[2]); verify(options).range(eq(273190L), eq(123633L)); }
Example #12
Source File: OrcMetadataStat.java From rainbow with Apache License 2.0 | 5 votes |
/** * get the total uncompressed size of the orc files. * * @return */ @Override public long getTotalSize() { long size = 0; for (Reader reader : this.fileReaders) { // contentLength includes the header ('ORC') length which is 3 bytes. size += reader.getContentLength()-3; } return size; }
Example #13
Source File: ORC.java From iceberg with Apache License 2.0 | 5 votes |
static Reader newFileReader(String location, ReaderOptions readerOptions) { try { return OrcFile.createReader(new Path(location), readerOptions); } catch (IOException ioe) { throw new RuntimeIOException(ioe, "Failed to open file: %s", location); } }
Example #14
Source File: ORC.java From iceberg with Apache License 2.0 | 5 votes |
static Reader newFileReader(InputFile file, Configuration config) { ReaderOptions readerOptions = OrcFile.readerOptions(config).useUTCTimestamp(true); if (file instanceof HadoopInputFile) { readerOptions.filesystem(((HadoopInputFile) file).getFileSystem()); } return newFileReader(file.location(), readerOptions); }
Example #15
Source File: TestMetricsRowGroupFilter.java From iceberg with Apache License 2.0 | 5 votes |
public void createOrcInputFile() throws IOException { if (orcFile.exists()) { Assert.assertTrue(orcFile.delete()); } OutputFile outFile = Files.localOutput(orcFile); try (FileAppender<GenericRecord> appender = ORC.write(outFile) .schema(FILE_SCHEMA) .createWriterFunc(GenericOrcWriter::buildWriter) .build()) { GenericRecord record = GenericRecord.create(FILE_SCHEMA); // create 50 records for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) { record.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0 record.setField("_no_stats_parquet", TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats // in Parquet, but will produce stats for ORC record.setField("_required", "req"); // required, always non-null record.setField("_all_nulls", null); // never non-null record.setField("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values record.setField("_no_nulls", ""); // optional, but always non-null record.setField("_str", i + "str" + i); GenericRecord structNotNull = GenericRecord.create(_structFieldType); structNotNull.setField("_int_field", INT_MIN_VALUE + i); record.setField("_struct_not_null", structNotNull); // struct with int appender.add(record); } } InputFile inFile = Files.localInput(orcFile); try (Reader reader = OrcFile.createReader(new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) { Assert.assertEquals("Should create only one stripe", 1, reader.getStripes().size()); } orcFile.deleteOnExit(); }
Example #16
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDecimalPredicate() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Not( // decimal pred new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5)))); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(not leaf-0)", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString()); }
Example #17
Source File: TestOrcMetadata.java From rainbow with Apache License 2.0 | 5 votes |
@Test public void test () throws IOException, Descriptors.DescriptorValidationException { Configuration conf = new Configuration(); System.setProperty("hadoop.home.dir", "/"); FileSystem fileSystem = FileSystem.get(URI.create("hdfs://presto00:9000"), conf); Path hdfsDirPath = new Path("/rainbow2/orc_new_compress"); System.out.println(fileSystem.isFile(hdfsDirPath)); FileStatus[] fileStatuses = fileSystem.listStatus(hdfsDirPath); System.out.println(fileStatuses.length); for (FileStatus status : fileStatuses) { status.getPath(); System.out.println(status.getPath() + ", " + status.getLen()); } Reader reader = OrcFile.createReader(fileStatuses[0].getPath(), OrcFile.readerOptions(conf)); System.out.println("file length:" + reader.getFileTail().getFileLength()); List<String> columnNames = new ArrayList<>(); columnNames.add("samplepercent"); System.out.println(reader.getRawDataSizeOfColumns(columnNames)); System.out.println(reader.getFileTail().getFooter().getTypes(0).getFieldNames(0)); System.out.println(reader.getTypes().get(0).getSerializedSize()); List<Reader> readers = new ArrayList<>(); for (FileStatus fileStatus : fileStatuses) { Reader reader1 = OrcFile.createReader(fileStatus.getPath(), OrcFile.readerOptions(conf)); readers.add(reader1); System.out.println("content size: " + reader1.getContentLength() + ", raw size: " + reader1.getRawDataSize()); } for (String columnName : reader.getSchema().getFieldNames()) { System.out.println(columnName); } }
Example #18
Source File: OrcNoHiveShim.java From flink with Apache License 2.0 | 5 votes |
@Override public RecordReader createRecordReader( Configuration conf, TypeDescription schema, int[] selectedFields, List<OrcSplitReader.Predicate> conjunctPredicates, org.apache.flink.core.fs.Path path, long splitStart, long splitLength) throws IOException { // open ORC file and create reader org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri()); Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf)); // get offset and length for the stripes that start in the split Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit( splitStart, splitLength, orcReader.getStripes()); // create ORC row reader configuration Reader.Options options = new Reader.Options() .schema(schema) .range(offsetAndLength.f0, offsetAndLength.f1) .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf)) .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)) .tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf)); // TODO configure filters // configure selected fields options.include(computeProjectionMask(schema, selectedFields)); // create ORC row reader RecordReader orcRowsReader = orcReader.rows(options); // assign ids schema.getId(); return orcRowsReader; }
Example #19
Source File: OrcShimV200.java From flink with Apache License 2.0 | 5 votes |
protected RecordReader createRecordReader(Reader reader, Reader.Options options) throws IOException { try { return (RecordReader) invokeExactMethod(reader, "rowsOptions", options); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) { throw new IOException(e); } }
Example #20
Source File: OrcFileAppender.java From iceberg with Apache License 2.0 | 5 votes |
@Override public List<Long> splitOffsets() { Preconditions.checkState(isClosed, "File is not yet closed"); try (Reader reader = ORC.newFileReader(file.toInputFile(), conf)) { List<StripeInformation> stripes = reader.getStripes(); return Collections.unmodifiableList(Lists.transform(stripes, StripeInformation::getOffset)); } catch (IOException e) { throw new RuntimeIOException(e, "Can't close ORC reader %s", file.location()); } }
Example #21
Source File: OrcMetrics.java From iceberg with Apache License 2.0 | 5 votes |
static Metrics fromInputFile(InputFile file, Configuration config) { try (Reader orcReader = ORC.newFileReader(file, config)) { return buildOrcMetrics(orcReader.getNumberOfRows(), orcReader.getSchema(), orcReader.getStatistics()); } catch (IOException ioe) { throw new RuntimeIOException(ioe, "Failed to open file: %s", file.location()); } }
Example #22
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTimePredicates() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration()); rowOrcInputFormat.addPredicate( // OR new OrcRowInputFormat.Or( // timestamp pred new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")), // date pred new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25"))) ); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString()); assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString()); }
Example #23
Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testProjectionMaskNested() throws IOException{ rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration()); OrcRowInputFormat spy = spy(rowOrcInputFormat); // mock options to check configuration of ORC reader Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.selectFields(9, 11, 2); spy.openInputFormat(); FileInputSplit[] splits = spy.createInputSplits(1); spy.open(splits[0]); // top-level struct is false boolean[] expected = new boolean[]{ false, // top level false, false, // flat fields 0, 1 are out true, // flat field 2 is in false, false, false, false, false, false, // flat fields 3, 4, 5, 6, 7, 8 are out true, true, true, true, true, // nested field 9 is in false, false, false, false, // nested field 10 is out true, true, true, true, true}; // nested field 11 is in assertArrayEquals(expected, options.getInclude()); }
Example #24
Source File: JsonORCFileReaderWriterFactory.java From secor with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException { schema = schemaProvider.getSchema(logFilePath.getTopic(), logFilePath); Path path = new Path(logFilePath.getLogFilePath()); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(new Configuration(true))); offset = logFilePath.getOffset(); rows = reader.rows(); batch = reader.getSchema().createRowBatch(); rows.nextBatch(batch); }
Example #25
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDecimalPredicate() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration()); rowOrcInputFormat.addPredicate( new OrcRowInputFormat.Not( // decimal pred new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5)))); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(not leaf-0)", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString()); }
Example #26
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testTimePredicates() throws Exception { rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration()); rowOrcInputFormat.addPredicate( // OR new OrcRowInputFormat.Or( // timestamp pred new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")), // date pred new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25"))) ); FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1); rowOrcInputFormat.openInputFormat(); // mock options to check configuration of ORC reader OrcRowInputFormat spy = spy(rowOrcInputFormat); Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.openInputFormat(); spy.open(splits[0]); // verify predicate configuration SearchArgument sarg = options.getSearchArgument(); assertNotNull(sarg); assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString()); assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString()); }
Example #27
Source File: PentahoOrcInputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
private List<IOrcInputField> readSchema( Reader orcReader ) { OrcSchemaConverter orcSchemaConverter = new OrcSchemaConverter(); List<IOrcInputField> orcInputFields = orcSchemaConverter.buildInputFields( readTypeDescription( orcReader ) ); IOrcMetaData.Reader orcMetaDataReader = new OrcMetaDataReader( orcReader ); orcMetaDataReader.read( orcInputFields ); return orcInputFields; }
Example #28
Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testProjectionMaskNested() throws IOException{ rowOrcInputFormat = new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration()); OrcRowInputFormat spy = spy(rowOrcInputFormat); // mock options to check configuration of ORC reader Reader.Options options = new Reader.Options(); doReturn(options).when(spy).getOptions(any()); spy.selectFields(9, 11, 2); spy.openInputFormat(); FileInputSplit[] splits = spy.createInputSplits(1); spy.open(splits[0]); // top-level struct is false boolean[] expected = new boolean[]{ false, // top level false, false, // flat fields 0, 1 are out true, // flat field 2 is in false, false, false, false, false, false, // flat fields 3, 4, 5, 6, 7, 8 are out true, true, true, true, true, // nested field 9 is in false, false, false, false, // nested field 10 is out true, true, true, true, true}; // nested field 11 is in assertArrayEquals(expected, options.getInclude()); }
Example #29
Source File: OrcMetaDataReader.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
public OrcMetaDataReader( Reader reader ) { this.reader = reader; }
Example #30
Source File: OrcUtils.java From incubator-gobblin with Apache License 2.0 | 4 votes |
public static Reader getRecordReaderFromFile(Configuration conf, Path orcFilePath) throws IOException { return OrcFile.createReader(orcFilePath, new OrcFile.ReaderOptions(conf)); }