org.apache.hadoop.hive.ql.io.orc.Writer Java Exaples

Source File: IOUtil.java From hugegraph-loader with Apache License 2.0

6 votes

public default void writeOrc(String fileName, TypeInfo typeInfo,
                             Object... values) {
    Path path = new Path(this.storePath(), fileName);
    ObjectInspector inspector = TypeInfoUtils
                                .getStandardJavaObjectInspectorFromTypeInfo(
                                typeInfo);
    OrcFile.WriterOptions options = OrcFile.writerOptions(this.config())
                                           .inspector(inspector);

    Object row = Arrays.asList(values);
    try (Writer writer = OrcFile.createWriter(path, options)) {
        writer.addRow(row);
    } catch (IOException e) {
        throw new RuntimeException(String.format(
                  "Failed to write values '%s' to file '%s' in ORC " +
                  "compression format", row, path), e);
    }
}

Source File: OrcTestTools.java From incubator-gobblin with Apache License 2.0

6 votes

/**
 * AvroRow version of writeAsOrcBinary
 */
private void writeAsOrcBinary(OrcRowIterator input, TypeInfo schema, Path outputPath) throws IOException {
  Configuration configuration = new Configuration();

  // Note that it doesn't support schema evolution at all.
  // If the schema in realRow is inconsistent with given schema, writing into disk
  // would run into failure.
  ObjectInspector oi = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(schema);
  OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).inspector(oi);
  Writer writer = null;

  while (input.hasNext()) {
    AvroRow avroRow = (AvroRow) input.next();
    if (writer == null) {
      options.inspector(avroRow.getInspector());
      writer = OrcFile.createWriter(outputPath, options);
    }
    writer.addRow(avroRow.realRow);
  }
  if (writer != null) {
    writer.close();
  }
}

Source File: PutORC.java From nifi with Apache License 2.0

6 votes

@Override
public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema)
        throws IOException, SchemaNotFoundException {

    final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue();
    final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
    final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue());
    final boolean normalizeForHive = context.getProperty(HIVE_FIELD_NAMES).asBoolean();
    TypeInfo orcSchema = NiFiOrcUtils.getOrcSchema(schema, normalizeForHive);
    final Writer orcWriter = NiFiOrcUtils.createWriter(path, conf, orcSchema, stripeSize, compressionType, bufferSize);
    final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet()
            ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue()
            : NiFiOrcUtils.normalizeHiveTableName(schema.getIdentifier().getName().orElse("unknown"));
    final boolean hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).asBoolean();

    return new ORCHDFSRecordWriter(orcWriter, schema, hiveTableName, hiveFieldNames);
}

Source File: TestOrcReaderPositions.java From presto with Apache License 2.0

5 votes

private static void createFileWithOnlyUserMetadata(File file, Map<String, String> metadata)
        throws IOException
{
    Configuration conf = new Configuration(false);
    OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(conf)
            .memory(new NullMemoryManager())
            .inspector(createSettableStructObjectInspector("test", BIGINT))
            .compress(SNAPPY);
    Writer writer = OrcFile.createWriter(new Path(file.toURI()), writerOptions);
    for (Map.Entry<String, String> entry : metadata.entrySet()) {
        writer.addUserMetadata(entry.getKey(), ByteBuffer.wrap(entry.getValue().getBytes(UTF_8)));
    }
    writer.close();
}

Source File: TestOrcReaderPositions.java From presto with Apache License 2.0

5 votes

private static void flushWriter(FileSinkOperator.RecordWriter writer)
        throws IOException, ReflectiveOperationException
{
    Field field = OrcOutputFormat.class.getClassLoader()
            .loadClass(OrcOutputFormat.class.getName() + "$OrcRecordWriter")
            .getDeclaredField("writer");
    field.setAccessible(true);
    ((Writer) field.get(writer)).writeIntermediateFooter();
}

Source File: OrcFileRewriter.java From presto with Apache License 2.0

5 votes

public static OrcFileInfo rewrite(File input, File output, BitSet rowsToDelete)
        throws IOException
{
    try (FileSystem fileSystem = new SyncingFileSystem(CONFIGURATION)) {
        Reader reader = createReader(fileSystem, path(input));

        if (reader.getNumberOfRows() < rowsToDelete.length()) {
            throw new IOException("File has fewer rows than deletion vector");
        }
        int deleteRowCount = rowsToDelete.cardinality();
        if (reader.getNumberOfRows() == deleteRowCount) {
            return new OrcFileInfo(0, 0);
        }
        if (reader.getNumberOfRows() >= Integer.MAX_VALUE) {
            throw new IOException("File has too many rows");
        }
        int inputRowCount = toIntExact(reader.getNumberOfRows());

        WriterOptions writerOptions = OrcFile.writerOptions(CONFIGURATION)
                .memory(new NullMemoryManager())
                .fileSystem(fileSystem)
                .compress(reader.getCompression())
                .inspector(reader.getObjectInspector());

        long start = System.nanoTime();
        try (Closer<RecordReader, IOException> recordReader = closer(reader.rows(), RecordReader::close);
                Closer<Writer, IOException> writer = closer(createWriter(path(output), writerOptions), Writer::close)) {
            if (reader.hasMetadataValue(OrcFileMetadata.KEY)) {
                ByteBuffer orcFileMetadata = reader.getMetadataValue(OrcFileMetadata.KEY);
                writer.get().addUserMetadata(OrcFileMetadata.KEY, orcFileMetadata);
            }
            OrcFileInfo fileInfo = rewrite(recordReader.get(), writer.get(), rowsToDelete, inputRowCount);
            log.debug("Rewrote file %s in %s (input rows: %s, output rows: %s)", input.getName(), nanosSince(start), inputRowCount, inputRowCount - deleteRowCount);
            return fileInfo;
        }
    }
}

Source File: OrcFileRewriter.java From presto with Apache License 2.0

5 votes

private static OrcFileInfo rewrite(RecordReader reader, Writer writer, BitSet rowsToDelete, int inputRowCount)
        throws IOException
{
    Object object = null;
    int row = 0;
    long rowCount = 0;
    long uncompressedSize = 0;

    row = rowsToDelete.nextClearBit(row);
    if (row < inputRowCount) {
        reader.seekToRow(row);
    }

    while (row < inputRowCount) {
        if (Thread.currentThread().isInterrupted()) {
            throw new InterruptedIOException();
        }

        // seekToRow() is extremely expensive
        if (reader.getRowNumber() < row) {
            reader.next(object);
            continue;
        }

        object = reader.next(object);
        writer.addRow(object);
        rowCount++;
        uncompressedSize += uncompressedSize(object);

        row = rowsToDelete.nextClearBit(row + 1);
    }
    return new OrcFileInfo(rowCount, uncompressedSize);
}

Source File: ORCHDFSRecordWriter.java From nifi with Apache License 2.0

5 votes

public ORCHDFSRecordWriter(final Writer orcWriter, final RecordSchema recordSchema, final String hiveTableName, final boolean hiveFieldNames) {
    this.recordSchema = recordSchema;
    this.orcWriter = orcWriter;
    this.hiveFieldNames = hiveFieldNames;
    this.orcSchema = NiFiOrcUtils.getOrcSchema(recordSchema, this.hiveFieldNames);
    this.hiveTableName = hiveTableName;
    this.recordFields = recordSchema != null ? recordSchema.getFields() : null;
    this.numRecordFields = recordFields != null ? recordFields.size() : -1;
    // Reuse row object
    this.workingRow = numRecordFields > -1 ? new Object[numRecordFields] : null;
}

org.apache.hadoop.hive.ql.io.orc.Writer Java Examples