org.apache.orc.Reader Java Exaples

Source File: OrcIterable.java From iceberg with Apache License 2.0

6 votes

private static VectorizedRowBatchIterator newOrcIterator(InputFile file,
                                                         TypeDescription readerSchema,
                                                         Long start, Long length,
                                                         Reader orcFileReader, SearchArgument sarg) {
  final Reader.Options options = orcFileReader.options();
  if (start != null) {
    options.range(start, length);
  }
  options.schema(readerSchema);
  options.searchArgument(sarg, new String[]{});

  try {
    return new VectorizedRowBatchIterator(file.location(), readerSchema, orcFileReader.rows(options));
  } catch (IOException ioe) {
    throw new RuntimeIOException(ioe, "Failed to get ORC rows for file: %s", file);
  }
}

Source File: OrcShimV200.java From flink with Apache License 2.0

6 votes

protected Reader createReader(Path path, Configuration conf) throws IOException {
	try {
		Class orcFileClass = Class.forName("org.apache.hadoop.hive.ql.io.orc.OrcFile");
		Object readerOptions = invokeStaticMethod(orcFileClass, "readerOptions", conf);

		Class readerClass = Class.forName("org.apache.hadoop.hive.ql.io.orc.ReaderImpl");
		//noinspection unchecked
		return (Reader) invokeConstructor(readerClass, path, readerOptions);
	} catch (ClassNotFoundException |
			NoSuchMethodException |
			IllegalAccessException |
			InstantiationException |
			InvocationTargetException e) {
		throw new IOException(e);
	}
}

Source File: OrcFileSystemITCase.java From flink with Apache License 2.0

6 votes

@Override
public void testNonPartition() {
	super.testNonPartition();

	// test configure success
	File directory = new File(URI.create(resultPath()).getPath());
	File[] files = directory.listFiles((dir, name) ->
			!name.startsWith(".") && !name.startsWith("_"));
	Assert.assertNotNull(files);
	Path path = new Path(URI.create(files[0].getAbsolutePath()));

	try {
		Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(new Configuration()));
		if (configure) {
			Assert.assertEquals("SNAPPY", reader.getCompressionKind().toString());
		} else {
			Assert.assertEquals("ZLIB", reader.getCompressionKind().toString());
		}
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}

Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testSplitStripesGivenSplits() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = spy(new Reader.Options());
	doReturn(options).when(spy).getOptions(any());

	FileInputSplit[] splits = spy.createInputSplits(3);

	spy.openInputFormat();
	spy.open(splits[0]);
	verify(options).range(eq(3L), eq(137005L));
	spy.open(splits[1]);
	verify(options).range(eq(137008L), eq(136182L));
	spy.open(splits[2]);
	verify(options).range(eq(273190L), eq(123633L));
}

Source File: PentahoOrcRecordReader.java From pentaho-hadoop-shims with Apache License 2.0

6 votes

static Reader getReader( String fileName, Configuration conf ) {

    try {
      S3NCredentialUtils.applyS3CredentialsToHadoopConfigurationIfNecessary( fileName, conf );
      Path filePath = new Path( S3NCredentialUtils.scrubFilePathIfNecessary( fileName ) );
      FileSystem fs = FileSystem.get( filePath.toUri(), conf );
      if ( !fs.exists( filePath ) ) {
        throw new NoSuchFileException( fileName );
      }
      if ( fs.getFileStatus( filePath ).isDirectory() ) {
        PathFilter pathFilter = file -> file.getName().endsWith( ".orc" );

        FileStatus[] fileStatuses = fs.listStatus( filePath, pathFilter );
        if ( fileStatuses.length == 0 ) {
          throw new NoSuchFileException( fileName );
        }
        filePath = fileStatuses[ 0 ].getPath();
      }
      return OrcFile.createReader( filePath,
        OrcFile.readerOptions( conf ).filesystem( fs ) );
    } catch ( IOException e ) {
      throw new IllegalArgumentException( "Unable to read data from file " + fileName, e );
    }
  }

Source File: ORC.java From iceberg with Apache License 2.0

6 votes

public OrcIterator build() {
  Preconditions.checkNotNull(schema, "Schema is required");
  try {
    Path path = new Path(file.location());
    Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    ColumnIdMap columnIds = new ColumnIdMap();
    TypeDescription orcSchema = TypeConversion.toOrc(schema, columnIds);
    Reader.Options options = reader.options();
    if (start != null) {
      options.range(start, length);
    }
    options.schema(orcSchema);
    return new OrcIterator(path, orcSchema, reader.rows(options));
  } catch (IOException e) {
    throw new RuntimeException("Can't open " + file.location(), e);
  }
}

Source File: OrcBulkWriterTestUtil.java From flink with Apache License 2.0

6 votes

public static void validate(File files, List<Record> expected) throws IOException {
	final File[] buckets = files.listFiles();
	assertNotNull(buckets);
	assertEquals(1, buckets.length);

	final File[] partFiles = buckets[0].listFiles();
	assertNotNull(partFiles);

	for (File partFile : partFiles) {
		assertTrue(partFile.length() > 0);

		OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(new Configuration());
		Reader reader = OrcFile.createReader(new org.apache.hadoop.fs.Path(partFile.toURI()), readerOptions);

		assertEquals(3, reader.getNumberOfRows());
		assertEquals(2, reader.getSchema().getFieldNames().size());
		assertSame(reader.getCompressionKind(), CompressionKind.LZ4);
		assertTrue(reader.hasMetadataValue(USER_METADATA_KEY));
		assertTrue(reader.getMetadataKeys().contains(USER_METADATA_KEY));

		List<Record> results = getResults(reader);

		assertEquals(3, results.size());
		assertEquals(results, expected);
	}
}

Source File: OrcCompactionTaskTest.java From incubator-gobblin with Apache License 2.0

6 votes

/**
 * Read a output ORC compacted file into memory.
 * This only works if fields are int value.
 */
public List<OrcStruct> readOrcFile(Path orcFilePath)
    throws IOException, InterruptedException {
  ReaderImpl orcReader = new ReaderImpl(orcFilePath, new OrcFile.ReaderOptions(new Configuration()));

  Reader.Options options = new Reader.Options().schema(orcReader.getSchema());
  OrcMapreduceRecordReader recordReader = new OrcMapreduceRecordReader(orcReader, options);
  List<OrcStruct> result = new ArrayList<>();

  OrcStruct recordContainer;
  while (recordReader.nextKeyValue()) {
    recordContainer = (OrcStruct) OrcUtils.createValueRecursively(orcReader.getSchema());
    OrcUtils.upConvertOrcStruct((OrcStruct) recordReader.getCurrentValue(), recordContainer, orcReader.getSchema());
    result.add(recordContainer);
  }

  return result;
}

Source File: TestMetricsRowGroupFilterTypes.java From iceberg with Apache License 2.0

6 votes

public void createOrcInputFile(List<Record> records) throws IOException {
  if (ORC_FILE.exists()) {
    Assert.assertTrue(ORC_FILE.delete());
  }

  OutputFile outFile = Files.localOutput(ORC_FILE);
  try (FileAppender<Record> appender = ORC.write(outFile)
      .schema(FILE_SCHEMA)
      .createWriterFunc(GenericOrcWriter::buildWriter)
      .build()) {
    appender.addAll(records);
  }

  InputFile inFile = Files.localInput(ORC_FILE);
  try (Reader reader = OrcFile.createReader(new Path(inFile.location()),
      OrcFile.readerOptions(new Configuration()))) {
    Assert.assertEquals("Should create only one stripe", 1, reader.getStripes().size());
  }

  ORC_FILE.deleteOnExit();
}

Source File: OrcBulkWriterTestUtil.java From flink with Apache License 2.0

6 votes

private static List<Record> getResults(Reader reader) throws IOException {
	List<Record> results = new ArrayList<>();

	RecordReader recordReader = reader.rows();
	VectorizedRowBatch batch = reader.getSchema().createRowBatch();

	while (recordReader.nextBatch(batch)) {
		BytesColumnVector stringVector = (BytesColumnVector)  batch.cols[0];
		LongColumnVector intVector = (LongColumnVector) batch.cols[1];
		for (int r = 0; r < batch.size; r++) {
			String name = new String(stringVector.vector[r], stringVector.start[r], stringVector.length[r]);
			int age = (int) intVector.vector[r];

			results.add(new Record(name, age));
		}
		recordReader.close();
	}

	return results;
}

Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSplitStripesGivenSplits() throws IOException {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_FLAT), TEST_SCHEMA_FLAT, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = spy(new Reader.Options());
	doReturn(options).when(spy).getOptions(any());

	FileInputSplit[] splits = spy.createInputSplits(3);

	spy.openInputFormat();
	spy.open(splits[0]);
	verify(options).range(eq(3L), eq(137005L));
	spy.open(splits[1]);
	verify(options).range(eq(137008L), eq(136182L));
	spy.open(splits[2]);
	verify(options).range(eq(273190L), eq(123633L));
}

Source File: OrcMetadataStat.java From rainbow with Apache License 2.0

5 votes

/**
 * get the total uncompressed size of the orc files.
 *
 * @return
 */
@Override
public long getTotalSize()
{
    long size = 0;
    for (Reader reader : this.fileReaders)
    {
        // contentLength includes the header ('ORC') length which is 3 bytes.
        size += reader.getContentLength()-3;
    }
    return size;
}

Source File: ORC.java From iceberg with Apache License 2.0

5 votes

static Reader newFileReader(String location, ReaderOptions readerOptions) {
  try {
    return OrcFile.createReader(new Path(location), readerOptions);
  } catch (IOException ioe) {
    throw new RuntimeIOException(ioe, "Failed to open file: %s", location);
  }
}

Source File: ORC.java From iceberg with Apache License 2.0

5 votes

static Reader newFileReader(InputFile file, Configuration config) {
  ReaderOptions readerOptions = OrcFile.readerOptions(config).useUTCTimestamp(true);
  if (file instanceof HadoopInputFile) {
    readerOptions.filesystem(((HadoopInputFile) file).getFileSystem());
  }
  return newFileReader(file.location(), readerOptions);
}

Source File: TestMetricsRowGroupFilter.java From iceberg with Apache License 2.0

5 votes

public void createOrcInputFile() throws IOException {
  if (orcFile.exists()) {
    Assert.assertTrue(orcFile.delete());
  }

  OutputFile outFile = Files.localOutput(orcFile);
  try (FileAppender<GenericRecord> appender = ORC.write(outFile)
      .schema(FILE_SCHEMA)
      .createWriterFunc(GenericOrcWriter::buildWriter)
      .build()) {
    GenericRecord record = GenericRecord.create(FILE_SCHEMA);
    // create 50 records
    for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
      record.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
      record.setField("_no_stats_parquet", TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
                                                                        // in Parquet, but will produce stats for ORC
      record.setField("_required", "req"); // required, always non-null
      record.setField("_all_nulls", null); // never non-null
      record.setField("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
      record.setField("_no_nulls", ""); // optional, but always non-null
      record.setField("_str", i + "str" + i);

      GenericRecord structNotNull = GenericRecord.create(_structFieldType);
      structNotNull.setField("_int_field", INT_MIN_VALUE + i);
      record.setField("_struct_not_null", structNotNull); // struct with int

      appender.add(record);
    }
  }

  InputFile inFile = Files.localInput(orcFile);
  try (Reader reader = OrcFile.createReader(new Path(inFile.location()),
      OrcFile.readerOptions(new Configuration()))) {
    Assert.assertEquals("Should create only one stripe", 1, reader.getStripes().size());
  }

  orcFile.deleteOnExit();
}

Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}

Source File: TestOrcMetadata.java From rainbow with Apache License 2.0

5 votes

@Test
public void test () throws IOException, Descriptors.DescriptorValidationException
{
    Configuration conf = new Configuration();
    System.setProperty("hadoop.home.dir", "/");
    FileSystem fileSystem = FileSystem.get(URI.create("hdfs://presto00:9000"), conf);
    Path hdfsDirPath = new Path("/rainbow2/orc_new_compress");
    System.out.println(fileSystem.isFile(hdfsDirPath));
    FileStatus[] fileStatuses = fileSystem.listStatus(hdfsDirPath);
    System.out.println(fileStatuses.length);
    for (FileStatus status : fileStatuses)
    {
        status.getPath();
        System.out.println(status.getPath() + ", " + status.getLen());
    }

    Reader reader = OrcFile.createReader(fileStatuses[0].getPath(),
            OrcFile.readerOptions(conf));
    System.out.println("file length:" + reader.getFileTail().getFileLength());
    List<String> columnNames = new ArrayList<>();
    columnNames.add("samplepercent");
    System.out.println(reader.getRawDataSizeOfColumns(columnNames));
    System.out.println(reader.getFileTail().getFooter().getTypes(0).getFieldNames(0));
    System.out.println(reader.getTypes().get(0).getSerializedSize());

    List<Reader> readers = new ArrayList<>();
    for (FileStatus fileStatus : fileStatuses)
    {
        Reader reader1 = OrcFile.createReader(fileStatus.getPath(),
                OrcFile.readerOptions(conf));
        readers.add(reader1);
        System.out.println("content size: " + reader1.getContentLength() + ", raw size: "
        + reader1.getRawDataSize());
    }

    for (String columnName : reader.getSchema().getFieldNames())
    {
        System.out.println(columnName);
    }
}

Source File: OrcNoHiveShim.java From flink with Apache License 2.0

5 votes

@Override
public RecordReader createRecordReader(
		Configuration conf,
		TypeDescription schema,
		int[] selectedFields,
		List<OrcSplitReader.Predicate> conjunctPredicates,
		org.apache.flink.core.fs.Path path,
		long splitStart,
		long splitLength) throws IOException {
	// open ORC file and create reader
	org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri());

	Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

	// get offset and length for the stripes that start in the split
	Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(
			splitStart, splitLength, orcReader.getStripes());

	// create ORC row reader configuration
	Reader.Options options = new Reader.Options()
			.schema(schema)
			.range(offsetAndLength.f0, offsetAndLength.f1)
			.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
			.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
			.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));

	// TODO configure filters

	// configure selected fields
	options.include(computeProjectionMask(schema, selectedFields));

	// create ORC row reader
	RecordReader orcRowsReader = orcReader.rows(options);

	// assign ids
	schema.getId();

	return orcRowsReader;
}

Source File: OrcShimV200.java From flink with Apache License 2.0

5 votes

protected RecordReader createRecordReader(Reader reader, Reader.Options options) throws IOException {
	try {
		return (RecordReader) invokeExactMethod(reader, "rowsOptions", options);
	} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
		throw new IOException(e);
	}
}

Source File: OrcFileAppender.java From iceberg with Apache License 2.0

5 votes

@Override
public List<Long> splitOffsets() {
  Preconditions.checkState(isClosed, "File is not yet closed");
  try (Reader reader = ORC.newFileReader(file.toInputFile(), conf)) {
    List<StripeInformation> stripes = reader.getStripes();
    return Collections.unmodifiableList(Lists.transform(stripes, StripeInformation::getOffset));
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Can't close ORC reader %s", file.location());
  }
}

Source File: OrcMetrics.java From iceberg with Apache License 2.0

5 votes

static Metrics fromInputFile(InputFile file, Configuration config) {
  try (Reader orcReader = ORC.newFileReader(file, config)) {
    return buildOrcMetrics(orcReader.getNumberOfRows(), orcReader.getSchema(), orcReader.getStatistics());
  } catch (IOException ioe) {
    throw new RuntimeIOException(ioe, "Failed to open file: %s", file.location());
  }
}

Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}

Source File: OrcRowInputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testProjectionMaskNested() throws IOException{
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.selectFields(9, 11, 2);
	spy.openInputFormat();
	FileInputSplit[] splits = spy.createInputSplits(1);
	spy.open(splits[0]);

	// top-level struct is false
	boolean[] expected = new boolean[]{
		false, // top level
		false, false, // flat fields 0, 1 are out
		true, // flat field 2 is in
		false, false, false, false, false, false, // flat fields 3, 4, 5, 6, 7, 8 are out
		true, true, true, true, true, // nested field 9 is in
		false, false, false, false, // nested field 10 is out
		true, true, true, true, true}; // nested field 11 is in
	assertArrayEquals(expected, options.getInclude());
}

Source File: JsonORCFileReaderWriterFactory.java From secor with Apache License 2.0

5 votes

@SuppressWarnings("deprecation")
public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec)
        throws IOException {
    schema = schemaProvider.getSchema(logFilePath.getTopic(),
            logFilePath);
    Path path = new Path(logFilePath.getLogFilePath());
    Reader reader = OrcFile.createReader(path,
            OrcFile.readerOptions(new Configuration(true)));
    offset = logFilePath.getOffset();
    rows = reader.rows();
    batch = reader.getSchema().createRowBatch();
    rows.nextBatch(batch);
}

Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testDecimalPredicate() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_DECIMAL), TEST_SCHEMA_DECIMAL, new Configuration());

	rowOrcInputFormat.addPredicate(
		new OrcRowInputFormat.Not(
			// decimal pred
			new OrcRowInputFormat.Equals("_col0", PredicateLeaf.Type.DECIMAL, BigDecimal.valueOf(-1000.5))));

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(not leaf-0)", sarg.getExpression().toString());
	assertEquals(1, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS _col0 -1000.5)", leaves.get(0).toString());
}

Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testTimePredicates() throws Exception {
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_TIMETYPES), TEST_SCHEMA_TIMETYPES, new Configuration());

	rowOrcInputFormat.addPredicate(
		// OR
		new OrcRowInputFormat.Or(
			// timestamp pred
			new OrcRowInputFormat.Equals("time", PredicateLeaf.Type.TIMESTAMP, Timestamp.valueOf("1900-05-05 12:34:56.100")),
			// date pred
			new OrcRowInputFormat.Equals("date", PredicateLeaf.Type.DATE, Date.valueOf("1900-12-25")))
		);

	FileInputSplit[] splits = rowOrcInputFormat.createInputSplits(1);
	rowOrcInputFormat.openInputFormat();

	// mock options to check configuration of ORC reader
	OrcRowInputFormat spy = spy(rowOrcInputFormat);
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.openInputFormat();
	spy.open(splits[0]);

	// verify predicate configuration
	SearchArgument sarg = options.getSearchArgument();
	assertNotNull(sarg);
	assertEquals("(or leaf-0 leaf-1)", sarg.getExpression().toString());
	assertEquals(2, sarg.getLeaves().size());
	List<PredicateLeaf> leaves = sarg.getLeaves();
	assertEquals("(EQUALS time 1900-05-05 12:34:56.1)", leaves.get(0).toString());
	assertEquals("(EQUALS date 1900-12-25)", leaves.get(1).toString());
}

Source File: PentahoOrcInputFormat.java From pentaho-hadoop-shims with Apache License 2.0

5 votes

private List<IOrcInputField> readSchema( Reader orcReader ) {
  OrcSchemaConverter orcSchemaConverter = new OrcSchemaConverter();
  List<IOrcInputField> orcInputFields = orcSchemaConverter.buildInputFields( readTypeDescription( orcReader ) );
  IOrcMetaData.Reader orcMetaDataReader = new OrcMetaDataReader( orcReader );
  orcMetaDataReader.read( orcInputFields );
  return orcInputFields;
}

Source File: OrcRowInputFormatTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testProjectionMaskNested() throws IOException{
	rowOrcInputFormat =
		new OrcRowInputFormat(getPath(TEST_FILE_NESTED), TEST_SCHEMA_NESTED, new Configuration());

	OrcRowInputFormat spy = spy(rowOrcInputFormat);

	// mock options to check configuration of ORC reader
	Reader.Options options = new Reader.Options();
	doReturn(options).when(spy).getOptions(any());

	spy.selectFields(9, 11, 2);
	spy.openInputFormat();
	FileInputSplit[] splits = spy.createInputSplits(1);
	spy.open(splits[0]);

	// top-level struct is false
	boolean[] expected = new boolean[]{
		false, // top level
		false, false, // flat fields 0, 1 are out
		true, // flat field 2 is in
		false, false, false, false, false, false, // flat fields 3, 4, 5, 6, 7, 8 are out
		true, true, true, true, true, // nested field 9 is in
		false, false, false, false, // nested field 10 is out
		true, true, true, true, true}; // nested field 11 is in
	assertArrayEquals(expected, options.getInclude());
}

Source File: OrcMetaDataReader.java From pentaho-hadoop-shims with Apache License 2.0

4 votes

public OrcMetaDataReader( Reader reader ) {
  this.reader = reader;
}

Source File: OrcUtils.java From incubator-gobblin with Apache License 2.0

4 votes

public static Reader getRecordReaderFromFile(Configuration conf, Path orcFilePath)
    throws IOException {
  return OrcFile.createReader(orcFilePath, new OrcFile.ReaderOptions(conf));
}

org.apache.orc.Reader Java Examples