org.apache.orc.OrcProto#StripeFooter

Source File: DremioORCRecordUtils.java From dremio-oss with Apache License 2.0

6 votes

/**
 * This function is a copy of original implementation from hive-private repository
 */
@Override
public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
  if (file == null) {
    open();
  }
  long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
  int tailLength = (int) stripe.getFooterLength();

  // read the footer
  ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
  file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
  return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream("footer",
    Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
    tailLength, codec, bufferSize));
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

6 votes

@Override
void startStripe(Map<StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  // For each stripe, checks the encoding and initializes the appropriate
  // reader
  switch (stripeFooter.getColumnsList().get(columnId).getKind()) {
    case DIRECT:
    case DIRECT_V2:
      reader = new StringDirectTreeReader(columnId);
      break;
    case DICTIONARY:
    case DICTIONARY_V2:
      reader = new StringDictionaryTreeReader(columnId);
      break;
    default:
      throw new IllegalArgumentException("Unsupported encoding " +
          stripeFooter.getColumnsList().get(columnId).getKind());
  }
  reader.startStripe(streams, stripeFooter);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

6 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);

  // read the dictionary blob
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DICTIONARY_DATA);
  InStream in = streams.get(name);
  readDictionaryStream(in);

  // read the lengths
  name = new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.LENGTH);
  in = streams.get(name);
  readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId));

  // set up the row reader
  name = new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.DATA);
  reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(name), false, false);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

6 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  // For each stripe, checks the encoding and initializes the appropriate
  // reader
  switch (stripeFooter.getColumnsList().get(columnId).getKind()) {
    case DIRECT:
    case DIRECT_V2:
      reader = new StringDirectTreeReader(columnId);
      break;
    case DICTIONARY:
    case DICTIONARY_V2:
      reader = new StringDictionaryTreeReader(columnId);
      break;
    default:
      throw new IllegalArgumentException("Unsupported encoding " +
          stripeFooter.getColumnsList().get(columnId).getKind());
  }
  reader.startStripe(streams, stripeFooter);
}

Source File: PhysicalWriterImpl.java From flink with Apache License 2.0

5 votes

private void writeStripeFooter(OrcProto.StripeFooter footer, long dataSize,
								long indexSize, OrcProto.StripeInformation.Builder dirEntry) throws IOException {
	footer.writeTo(protobufWriter);
	protobufWriter.flush();
	writer.flush();

	dirEntry.setOffset(stripeStart);
	dirEntry.setFooterLength(out.getPos() - stripeStart - dataSize - indexSize);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA);
  stream = streams.get(name);
  data = new BasicTextReaderShim(stream);

  lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
      false, false);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA);
  reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(name), true, false);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(new org.apache.orc.impl.StreamName(columnId,
          OrcProto.Stream.Kind.DATA)), true, skipCorrupt);
  nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(new org.apache.orc.impl.StreamName(columnId,
          OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt);
  base_timestamp = getBaseTimestamp(stripeFooter.getWriterTimezone());
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA);
  stream = streams.get(name);
  lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name =
      new org.apache.orc.impl.StreamName(columnId,
          OrcProto.Stream.Kind.DATA);
  stream = streams.get(name);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA);
  stream = streams.get(name);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA);
  reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(name), true, false);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA);
  reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(name), true, false);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA);
  reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
      streams.get(name), true, false);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  reader = new RunLengthByteReader(streams.get(new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA)));
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

@Override
void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  super.startStripe(streams, stripeFooter);
  reader = new BitFieldReader(streams.get(new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.DATA)), 1);
}

Source File: TreeReaderFactory.java From tajo with Apache License 2.0

5 votes

void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
                 OrcProto.StripeFooter stripeFooter
) throws IOException {
  checkEncoding(stripeFooter.getColumnsList().get(columnId));
  InStream in = streams.get(new org.apache.orc.impl.StreamName(columnId,
      OrcProto.Stream.Kind.PRESENT));
  if (in == null) {
    present = null;
    valuePresent = true;
  } else {
    present = new BitFieldReader(in, 1);
  }
}

Source File: DremioORCRecordUtils.java From dremio-oss with Apache License 2.0

5 votes

@Override
public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
  if (file == null) {
    open();
  }
  long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
  int tailLength = (int) stripe.getFooterLength();

  // read the footer
  ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
  file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
  return OrcProto.StripeFooter.parseFrom(
    InStream.createCodedInputStream("footer", singleton(
      new BufferChunk(tailBuf, 0)), tailLength, codec, bufferSize));
}

Source File: DremioORCRecordUtils.java From dremio-oss with Apache License 2.0

4 votes

/**
 * This function is a copy of original implementation from hive-private repository
 */
@Override
public OrcIndex readRowIndex(StripeInformation stripe,
                             OrcProto.StripeFooter footer,
                             boolean[] included,
                             OrcProto.RowIndex[] indexes,
                             boolean[] sargColumns,
                             OrcProto.BloomFilterIndex[] bloomFilterIndices
) throws IOException {
  if (file == null) {
    open();
  }
  if (footer == null) {
    footer = readStripeFooter(stripe);
  }
  if (indexes == null) {
    indexes = new OrcProto.RowIndex[typeCount];
  }
  if (bloomFilterIndices == null) {
    bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
  }
  long offset = stripe.getOffset();
  List<OrcProto.Stream> streams = footer.getStreamsList();
  for (int i = 0; i < streams.size(); i++) {
    OrcProto.Stream stream = streams.get(i);
    OrcProto.Stream nextStream = null;
    if (i < streams.size() - 1) {
      nextStream = streams.get(i+1);
    }
    int col = stream.getColumn();
    int len = (int) stream.getLength();
    // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
    // filter and combine the io to read row index and bloom filters for that column together
    if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
      boolean readBloomFilter = false;
      if (sargColumns != null && sargColumns[col] &&
        nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
        len += nextStream.getLength();
        i += 1;
        readBloomFilter = true;
      }
      if ((included == null || included[col]) && indexes[col] == null) {
        byte[] buffer = new byte[len];
        file.readFully(offset, buffer, 0, buffer.length);
        ByteBuffer bb = ByteBuffer.wrap(buffer);
        ByteBuffer rowIndexBB = bb.duplicate();
        rowIndexBB.position(0);
        rowIndexBB.limit((int)stream.getLength());
        indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
          Lists.<DiskRange>newArrayList(new BufferChunk(rowIndexBB, 0)), stream.getLength(),
          codec, bufferSize, null));
        if (readBloomFilter) {
          ByteBuffer bloomFilterBB = bb.duplicate();
          bloomFilterBB.position((int)stream.getLength());
          bloomFilterBB.limit(buffer.length);
          bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create(
            "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bloomFilterBB, 0)),
            nextStream.getLength(), codec, bufferSize, null));
        }
      }
    }
    offset += len;
  }

  OrcIndex index = new OrcIndex(indexes, bloomFilterIndices);
  return index;
}

Java Code Examples for org.apache.orc.OrcProto#StripeFooter