Java Code Examples for org.apache.iceberg.DataFiles#Builder
The following examples show how to use
org.apache.iceberg.DataFiles#Builder .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetRecordWriter.java From dremio-oss with Apache License 2.0 | 6 votes |
private byte[] getIcebergMetaData() throws IOException { if (!this.isIcebergWriter) { return null; } final long fileSize = parquetFileWriter.getPos(); DataFiles.Builder dataFileBuilder = DataFiles.builder(IcebergCatalog.getIcebergPartitionSpec(this.batchSchema, this.partitionColumns)) .withPath(path.toString()) .withFileSizeInBytes(fileSize) .withRecordCount(recordCount) .withFormat(FileFormat.PARQUET); // add partition info if (partitionColumns != null) { dataFileBuilder = dataFileBuilder.withPartition(partition.getIcebergPartitionData()); } // add column level metrics Metrics metrics = footerMetricsToIcebergMetrics(parquetFileWriter.getFooter(), batchSchema); dataFileBuilder = dataFileBuilder.withMetrics(metrics); return IcebergSerDe.serializeDataFile(dataFileBuilder.build()); }
Example 2
Source File: IcebergMetadata.java From presto with Apache License 2.0 | 5 votes |
@Override public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) { IcebergWritableTableHandle table = (IcebergWritableTableHandle) insertHandle; org.apache.iceberg.Table icebergTable = transaction.table(); List<CommitTaskData> commitTasks = fragments.stream() .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) .collect(toImmutableList()); Type[] partitionColumnTypes = icebergTable.spec().fields().stream() .map(field -> field.transform().getResultType( icebergTable.schema().findType(field.sourceId()))) .toArray(Type[]::new); AppendFiles appendFiles = transaction.newFastAppend(); for (CommitTaskData task : commitTasks) { HdfsContext context = new HdfsContext(session, table.getSchemaName(), table.getTableName()); Configuration configuration = hdfsEnvironment.getConfiguration(context, new Path(task.getPath())); DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()) .withInputFile(HadoopInputFile.fromLocation(task.getPath(), configuration)) .withFormat(table.getFileFormat()) .withMetrics(task.getMetrics().metrics()); if (!icebergTable.spec().fields().isEmpty()) { String partitionDataJson = task.getPartitionDataJson() .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); } appendFiles.appendFile(builder.build()); } appendFiles.commit(); transaction.commitTransaction(); return Optional.of(new HiveWrittenPartitions(commitTasks.stream() .map(CommitTaskData::getPath) .collect(toImmutableList()))); }
Example 3
Source File: TestIcebergInputFormat.java From iceberg with Apache License 2.0 | 4 votes |
private DataFile writeFile( Table table, StructLike partitionData, FileFormat fileFormat, List<Record> records) throws IOException { File file = temp.newFile(); Assert.assertTrue(file.delete()); FileAppender<Record> appender; switch (fileFormat) { case AVRO: appender = Avro.write(Files.localOutput(file)) .schema(table.schema()) .createWriterFunc(DataWriter::create) .named(fileFormat.name()) .build(); break; case PARQUET: appender = Parquet.write(Files.localOutput(file)) .schema(table.schema()) .createWriterFunc(GenericParquetWriter::buildWriter) .named(fileFormat.name()) .build(); break; case ORC: appender = ORC.write(Files.localOutput(file)) .schema(table.schema()) .createWriterFunc(GenericOrcWriter::buildWriter) .build(); break; default: throw new UnsupportedOperationException("Cannot write format: " + fileFormat); } try { appender.addAll(records); } finally { appender.close(); } DataFiles.Builder builder = DataFiles.builder(table.spec()) .withPath(file.toString()) .withFormat(format) .withFileSizeInBytes(file.length()) .withMetrics(appender.metrics()); if (partitionData != null) { builder.withPartition(partitionData); } return builder.build(); }