org.apache.iceberg.TableMetadata Java Exaples

Source File: HiveTableOperations.java From presto with Apache License 2.0

6 votes

@Override
public TableMetadata refresh()
{
    if (location.isPresent()) {
        refreshFromMetadataLocation(null);
        return currentMetadata;
    }

    Table table = getTable();

    if (!isIcebergTable(table)) {
        throw new UnknownTableTypeException(getSchemaTableName());
    }

    String metadataLocation = table.getParameters().get(METADATA_LOCATION);
    if (metadataLocation == null) {
        throw new PrestoException(ICEBERG_INVALID_METADATA, format("Table is missing [%s] property: %s", METADATA_LOCATION, getSchemaTableName()));
    }

    refreshFromMetadataLocation(metadataLocation);

    return currentMetadata;
}

Source File: HiveTableOperations.java From presto with Apache License 2.0

6 votes

@Override
public String metadataFileLocation(String filename)
{
    TableMetadata metadata = current();
    String location;
    if (metadata != null) {
        String writeLocation = metadata.properties().get(WRITE_METADATA_LOCATION);
        if (writeLocation != null) {
            return format("%s/%s", writeLocation, filename);
        }
        location = metadata.location();
    }
    else {
        location = this.location.orElseThrow(() -> new IllegalStateException("Location not set"));
    }
    return format("%s/%s/%s", location, METADATA_FOLDER_NAME, filename);
}

Source File: IcebergCatalog.java From dremio-oss with Apache License 2.0

6 votes

public void beginCreateTable(BatchSchema writerSchema, List<String> partitionColumns) {
  Preconditions.checkState(transaction == null, "Unexpected state");
  IcebergTableOperations tableOperations = new IcebergTableOperations(fsPath, configuration);
  SchemaConverter schemaConverter = new SchemaConverter();
  Schema schema;
  try {
    schema = schemaConverter.toIceberg(writerSchema);
  } catch (Exception ex) {
    throw UserException.validationError(ex).buildSilently();
  }
  PartitionSpec partitionSpec = getIcebergPartitionSpec(writerSchema, partitionColumns);
  TableMetadata metadata = TableMetadata.newTableMetadata(tableOperations, schema, partitionSpec, fsPath.toString());
  transaction = createTableTransaction(tableOperations, metadata);
  table = transaction.table();
  beginInsert();
}

Source File: TestHadoopCommits.java From iceberg with Apache License 2.0

6 votes

@Test
public void testMergeAppend() throws Exception {
  testFastAppend(); // create 2 compatible manifest files that will be merged

  // merge all manifests for this test
  table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit();

  // third append
  table.newAppend()
      .appendFile(FILE_C)
      .commit();

  List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles());
  Assert.assertEquals("Should scan 3 files", 3, tasks.size());

  Assert.assertEquals("Should contain 3 Avro manifest files",
      3, listManifestFiles().size());

  TableMetadata metadata = readMetadataVersion(5);
  Assert.assertEquals("Current snapshot should contain 1 merged manifest",
      1, metadata.currentSnapshot().allManifests().size());
}

Source File: HadoopTables.java From iceberg with Apache License 2.0

6 votes

/**
 * Create a table using the FileSystem implementation resolve from
 * location.
 *
 * @param schema iceberg schema used to create the table
 * @param spec partitioning spec, if null the table will be unpartitioned
 * @param properties a string map of table properties, initialized to empty if null
 * @param location a path URI (e.g. hdfs:///warehouse/my_table)
 * @return newly created table implementation
 */
@Override
public Table create(Schema schema, PartitionSpec spec, Map<String, String> properties,
                    String location) {
  Preconditions.checkNotNull(schema, "A table schema is required");

  TableOperations ops = newTableOps(location);
  if (ops.current() != null) {
    throw new AlreadyExistsException("Table already exists at location: " + location);
  }

  Map<String, String> tableProps = properties == null ? ImmutableMap.of() : properties;
  PartitionSpec partitionSpec = spec == null ? PartitionSpec.unpartitioned() : spec;
  TableMetadata metadata = TableMetadata.newTableMetadata(schema, partitionSpec, location, tableProps);
  ops.commit(null, metadata);

  return new BaseTable(ops, location);
}

Source File: HadoopTableOperations.java From iceberg with Apache License 2.0

6 votes

/**
 * Deletes the oldest metadata files if {@link TableProperties#METADATA_DELETE_AFTER_COMMIT_ENABLED} is true.
 *
 * @param base     table metadata on which previous versions were based
 * @param metadata new table metadata with updated previous versions
 */
private void deleteRemovedMetadataFiles(TableMetadata base, TableMetadata metadata) {
  if (base == null) {
    return;
  }

  boolean deleteAfterCommit = metadata.propertyAsBoolean(
      TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED,
      TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT);

  Set<TableMetadata.MetadataLogEntry> removedPreviousMetadataFiles = Sets.newHashSet(base.previousFiles());
  removedPreviousMetadataFiles.removeAll(metadata.previousFiles());

  if (deleteAfterCommit) {
    Tasks.foreach(removedPreviousMetadataFiles)
        .noRetry().suppressFailureWhenFinished()
        .onFailure((previousMetadataFile, exc) ->
            LOG.warn("Delete failed for previous metadata file: {}", previousMetadataFile, exc))
        .run(previousMetadataFile -> io().deleteFile(previousMetadataFile.file()));
  }
}

Source File: TestTables.java From iceberg with Apache License 2.0

6 votes

@Override
public void commit(TableMetadata base, TableMetadata metadata) {
  if (base != current) {
    throw new CommitFailedException("Cannot commit changes based on stale metadata");
  }
  synchronized (METADATA) {
    refresh();
    if (base == current) {
      if (failCommits > 0) {
        this.failCommits -= 1;
        throw new CommitFailedException("Injected failure");
      }
      METADATA.put(tableName, metadata);
      this.current = metadata;
    } else {
      throw new CommitFailedException(
          "Commit failed: table was updated at %d", base.lastUpdatedMillis());
    }
  }
}

Source File: HadoopTableOperations.java From iceberg with Apache License 2.0

5 votes

@Override
public TableMetadata current() {
  if (shouldRefresh) {
    return refresh();
  }
  return currentMetadata;
}

Source File: IcebergTableOps.java From metacat with Apache License 2.0

5 votes

@Override
public void commit(final TableMetadata base, final TableMetadata metadata) {
    if (!base.equals(metadata)) {
        location = writeNewMetadata(metadata, currentVersion() + 1);
        tableMetadata = null;
        this.requestRefresh();
    }
}

Source File: IcebergTableOps.java From metacat with Apache License 2.0

5 votes

@Override
public TableMetadata current() {
    if (tableMetadata == null) {
        tableMetadata = icebergTableOpsProxy.getMetadata(this, config.isIcebergCacheEnabled());
    }
    return tableMetadata;
}

Source File: TestHadoopCommits.java From iceberg with Apache License 2.0

5 votes

@Test
public void testFastAppend() throws Exception {
  // first append
  table.newFastAppend()
      .appendFile(FILE_A)
      .commit();

  Assert.assertTrue("Should create v2 for the update",
      version(2).exists() && version(2).isFile());
  Assert.assertEquals("Should write the current version to the hint file",
      2, readVersionHint());

  List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles());
  Assert.assertEquals("Should scan 1 file", 1, tasks.size());

  List<File> manifests = listManifestFiles();
  Assert.assertEquals("Should contain only one Avro manifest file", 1, manifests.size());

  // second append
  table.newFastAppend()
      .appendFile(FILE_B)
      .commit();

  Assert.assertTrue("Should create v3 for the update",
      version(3).exists() && version(3).isFile());
  Assert.assertEquals("Should write the current version to the hint file",
      3, readVersionHint());

  tasks = Lists.newArrayList(table.newScan().planFiles());
  Assert.assertEquals("Should scan 2 files", 2, tasks.size());

  Assert.assertEquals("Should contain 2 Avro manifest files",
      2, listManifestFiles().size());

  TableMetadata metadata = readMetadataVersion(3);
  Assert.assertEquals("Current snapshot should contain 2 manifests",
      2, metadata.currentSnapshot().allManifests().size());
}

Source File: WapUtil.java From iceberg with Apache License 2.0

5 votes

/**
 * Check if a given staged snapshot's associated wap-id was already published. Does not fail for non-WAP workflows.
 *
 * @param current the current {@link TableMetadata metadata} for the target table
 * @param wapSnapshotId a snapshot id which could have been staged and is associated with a wap id
 * @return the WAP ID that will be published, if the snapshot has one
 */
public static String validateWapPublish(TableMetadata current, long wapSnapshotId) {
  Snapshot cherryPickSnapshot = current.snapshot(wapSnapshotId);
  String wapId = stagedWapId(cherryPickSnapshot);
  if (wapId != null && !wapId.isEmpty()) {
    if (WapUtil.isWapIdPublished(current, wapId)) {
      throw new DuplicateWAPCommitException(wapId);
    }
  }

  return wapId;
}

Source File: HadoopCatalog.java From iceberg with Apache License 2.0

5 votes

@Override
public boolean dropTable(TableIdentifier identifier, boolean purge) {
  if (!isValidIdentifier(identifier)) {
    throw new NoSuchTableException("Invalid identifier: %s", identifier);
  }

  Path tablePath = new Path(defaultWarehouseLocation(identifier));
  TableOperations ops = newTableOps(identifier);
  TableMetadata lastMetadata;
  if (purge && ops.current() != null) {
    lastMetadata = ops.current();
  } else {
    lastMetadata = null;
  }

  try {
    if (purge && lastMetadata != null) {
      // Since the data files and the metadata files may store in different locations,
      // so it has to call dropTableData to force delete the data file.
      dropTableData(ops.io(), lastMetadata);
    }
    fs.delete(tablePath, true /* recursive */);
    return true;
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to delete file: %s", tablePath);
  }
}

Source File: HadoopTableOperations.java From iceberg with Apache License 2.0

5 votes

private static TableMetadata checkUUID(TableMetadata currentMetadata, TableMetadata newMetadata) {
  String newUUID = newMetadata.uuid();
  if (currentMetadata != null && currentMetadata.uuid() != null && newUUID != null) {
    Preconditions.checkState(newUUID.equals(currentMetadata.uuid()),
        "Table UUID does not match: current=%s != refreshed=%s", currentMetadata.uuid(), newUUID);
  }
  return newMetadata;
}

Source File: HadoopTableOperations.java From iceberg with Apache License 2.0

5 votes

@Override
public TableMetadata refresh() {
  int ver = version != null ? version : readVersionHint();
  try {
    Path metadataFile = getMetadataFile(ver);
    if (version == null && metadataFile == null && ver == 0) {
      // no v0 metadata means the table doesn't exist yet
      return null;
    } else if (metadataFile == null) {
      throw new ValidationException("Metadata file for version %d is missing", ver);
    }

    Path nextMetadataFile = getMetadataFile(ver + 1);
    while (nextMetadataFile != null) {
      ver += 1;
      metadataFile = nextMetadataFile;
      nextMetadataFile = getMetadataFile(ver + 1);
    }

    updateVersionAndMetadata(ver, metadataFile.toString());

    this.shouldRefresh = false;
    return currentMetadata;
  } catch (IOException e) {
    throw new RuntimeIOException(e, "Failed to refresh the table");
  }
}

Source File: TestTables.java From iceberg with Apache License 2.0

5 votes

@Override
public TableMetadata refresh() {
  synchronized (METADATA) {
    this.current = METADATA.get(tableName);
  }
  return current;
}

Source File: HiveTableOperations.java From presto with Apache License 2.0

5 votes

private static String metadataFileLocation(TableMetadata metadata, String filename)
{
    String location = metadata.properties().get(WRITE_METADATA_LOCATION);
    if (location != null) {
        return format("%s/%s", location, filename);
    }
    return format("%s/%s/%s", metadata.location(), METADATA_FOLDER_NAME, filename);
}

Source File: HiveTableOperations.java From presto with Apache License 2.0

5 votes

@Override
public TableMetadata current()
{
    if (shouldRefresh) {
        return refresh();
    }
    return currentMetadata;
}

Source File: HiveTableOperations.java From presto with Apache License 2.0

5 votes

private String writeNewMetadata(TableMetadata metadata, int newVersion)
{
    String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion);
    OutputFile newMetadataLocation = fileIo.newOutputFile(newTableMetadataFilePath);

    // write the new metadata
    TableMetadataParser.write(metadata, newMetadataLocation);

    return newTableMetadataFilePath;
}

Source File: HiveTableOperations.java From presto with Apache License 2.0

5 votes

private void refreshFromMetadataLocation(String newLocation)
{
    // use null-safe equality check because new tables have a null metadata location
    if (Objects.equals(currentMetadataLocation, newLocation)) {
        shouldRefresh = false;
        return;
    }

    AtomicReference<TableMetadata> newMetadata = new AtomicReference<>();
    Tasks.foreach(newLocation)
            .retry(20)
            .exponentialBackoff(100, 5000, 600000, 4.0)
            .suppressFailureWhenFinished()
            .run(metadataLocation -> newMetadata.set(
                    TableMetadataParser.read(this, io().newInputFile(metadataLocation))));

    String newUUID = newMetadata.get().uuid();
    if (currentMetadata != null) {
        checkState(newUUID == null || newUUID.equals(currentMetadata.uuid()),
                "Table UUID does not match: current=%s != refreshed=%s", currentMetadata.uuid(), newUUID);
    }

    currentMetadata = newMetadata.get();
    currentMetadataLocation = newLocation;
    version = parseVersion(newLocation);
    shouldRefresh = false;
}

Source File: TestHiveCommits.java From iceberg with Apache License 2.0

5 votes

@Test
public void testSuppressUnlockExceptions() throws TException, InterruptedException {
  Table table = catalog.loadTable(TABLE_IDENTIFIER);
  HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations();

  TableMetadata metadataV1 = ops.current();

  table.updateSchema()
      .addColumn("n", Types.IntegerType.get())
      .commit();

  ops.refresh();

  TableMetadata metadataV2 = ops.current();

  Assert.assertEquals(2, ops.current().schema().columns().size());

  HiveTableOperations spyOps = spy(ops);

  ArgumentCaptor<Long> lockId = ArgumentCaptor.forClass(Long.class);
  doThrow(new RuntimeException()).when(spyOps).doUnlock(lockId.capture());

  try {
    spyOps.commit(metadataV2, metadataV1);
  } finally {
    ops.doUnlock(lockId.getValue());
  }

  ops.refresh();

  // the commit must succeed
  Assert.assertEquals(1, ops.current().schema().columns().size());
}

Source File: RemoveOrphanFilesAction.java From iceberg with Apache License 2.0

5 votes

private Dataset<Row> buildValidMetadataFileDF() {
  String allManifestsMetadataTable = metadataTableName(MetadataTableType.ALL_MANIFESTS);
  Dataset<Row> manifestDF = spark.read().format("iceberg")
      .load(allManifestsMetadataTable)
      .selectExpr("path as file_path");

  List<String> otherMetadataFiles = Lists.newArrayList();

  for (Snapshot snapshot : table.snapshots()) {
    String manifestListLocation = snapshot.manifestListLocation();
    if (manifestListLocation != null) {
      otherMetadataFiles.add(manifestListLocation);
    }
  }

  otherMetadataFiles.add(ops.metadataFileLocation("version-hint.text"));

  TableMetadata metadata = ops.current();
  otherMetadataFiles.add(metadata.metadataFileLocation());
  for (TableMetadata.MetadataLogEntry previousMetadataFile : metadata.previousFiles()) {
    otherMetadataFiles.add(previousMetadataFile.file());
  }

  Dataset<Row> otherMetadataFileDF = spark
      .createDataset(otherMetadataFiles, Encoders.STRING())
      .toDF("file_path");

  return manifestDF.union(otherMetadataFileDF);
}

Source File: TestTables.java From iceberg with Apache License 2.0

5 votes

static TestTable create(File temp, String name, Schema schema, PartitionSpec spec) {
  TestTableOperations ops = new TestTableOperations(name);
  if (ops.current() != null) {
    throw new AlreadyExistsException("Table %s already exists at location: %s", name, temp);
  }
  ops.commit(null, TableMetadata.newTableMetadata(schema, spec, temp.toString(), ImmutableMap.of()));
  return new TestTable(ops, name);
}

Source File: HadoopTableOperations.java From iceberg with Apache License 2.0

4 votes

@Override
public TableOperations temp(TableMetadata uncommittedMetadata) {
  return new TableOperations() {
    @Override
    public TableMetadata current() {
      return uncommittedMetadata;
    }

    @Override
    public TableMetadata refresh() {
      throw new UnsupportedOperationException("Cannot call refresh on temporary table operations");
    }

    @Override
    public void commit(TableMetadata base, TableMetadata metadata) {
      throw new UnsupportedOperationException("Cannot call commit on temporary table operations");
    }

    @Override
    public String metadataFileLocation(String fileName) {
      return HadoopTableOperations.this.metadataFileLocation(fileName);
    }

    @Override
    public LocationProvider locationProvider() {
      return LocationProviders.locationsFor(uncommittedMetadata.location(), uncommittedMetadata.properties());
    }

    @Override
    public FileIO io() {
      return HadoopTableOperations.this.io();
    }

    @Override
    public EncryptionManager encryption() {
      return HadoopTableOperations.this.encryption();
    }

    @Override
    public long newSnapshotId() {
      return HadoopTableOperations.this.newSnapshotId();
    }
  };
}

Source File: HadoopTableOperations.java From iceberg with Apache License 2.0

4 votes

@Override
public void commit(TableMetadata base, TableMetadata metadata) {
  Pair<Integer, TableMetadata> current = versionAndMetadata();
  if (base != current.second()) {
    throw new CommitFailedException("Cannot commit changes based on stale table metadata");
  }

  if (base == metadata) {
    LOG.info("Nothing to commit.");
    return;
  }

  Preconditions.checkArgument(base == null || base.location().equals(metadata.location()),
      "Hadoop path-based tables cannot be relocated");
  Preconditions.checkArgument(
      !metadata.properties().containsKey(TableProperties.WRITE_METADATA_LOCATION),
      "Hadoop path-based tables cannot relocate metadata");

  String codecName = metadata.property(
      TableProperties.METADATA_COMPRESSION, TableProperties.METADATA_COMPRESSION_DEFAULT);
  TableMetadataParser.Codec codec = TableMetadataParser.Codec.fromName(codecName);
  String fileExtension = TableMetadataParser.getFileExtension(codec);
  Path tempMetadataFile = metadataPath(UUID.randomUUID().toString() + fileExtension);
  TableMetadataParser.write(metadata, io().newOutputFile(tempMetadataFile.toString()));

  int nextVersion = (current.first() != null ? current.first() : 0) + 1;
  Path finalMetadataFile = metadataFilePath(nextVersion, codec);
  FileSystem fs = getFileSystem(tempMetadataFile, conf);

  try {
    if (fs.exists(finalMetadataFile)) {
      throw new CommitFailedException(
          "Version %d already exists: %s", nextVersion, finalMetadataFile);
    }
  } catch (IOException e) {
    throw new RuntimeIOException(e,
        "Failed to check if next version exists: " + finalMetadataFile);
  }

  // this rename operation is the atomic commit operation
  renameToFinal(fs, tempMetadataFile, finalMetadataFile);

  // update the best-effort version pointer
  writeVersionHint(nextVersion);

  deleteRemovedMetadataFiles(base, metadata);

  this.shouldRefresh = true;
}

Source File: TestTables.java From iceberg with Apache License 2.0

4 votes

static TableMetadata readMetadata(String tableName) {
  synchronized (METADATA) {
    return METADATA.get(tableName);
  }
}

Source File: TestTables.java From iceberg with Apache License 2.0

4 votes

@Override
public TableMetadata current() {
  return current;
}

Source File: HiveTableOperations.java From presto with Apache License 2.0

4 votes

private static String newTableMetadataFilePath(TableMetadata meta, int newVersion)
{
    String codec = meta.property(METADATA_COMPRESSION, METADATA_COMPRESSION_DEFAULT);
    return metadataFileLocation(meta, format("%05d-%s%s", newVersion, randomUUID(), getFileExtension(codec)));
}

Source File: HadoopTableOperations.java From iceberg with Apache License 2.0

4 votes

private synchronized Pair<Integer, TableMetadata> versionAndMetadata() {
  return Pair.of(version, currentMetadata);
}

Source File: IcebergMetadata.java From presto with Apache License 2.0

4 votes

@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout)
{
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();

    Schema schema = toIcebergSchema(tableMetadata.getColumns());

    PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties()));

    Database database = metastore.getDatabase(schemaName)
            .orElseThrow(() -> new SchemaNotFoundException(schemaName));

    HdfsContext hdfsContext = new HdfsContext(session, schemaName, tableName);
    HiveIdentity identity = new HiveIdentity(session);
    String targetPath = getTableLocation(tableMetadata.getProperties());
    if (targetPath == null) {
        targetPath = getTableDefaultLocation(database, hdfsContext, hdfsEnvironment, schemaName, tableName).toString();
    }

    TableOperations operations = new HiveTableOperations(metastore, hdfsEnvironment, hdfsContext, identity, schemaName, tableName, session.getUser(), targetPath);
    if (operations.current() != null) {
        throw new TableAlreadyExistsException(schemaTableName);
    }

    ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builderWithExpectedSize(2);
    FileFormat fileFormat = getFileFormat(tableMetadata.getProperties());
    propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString());
    if (tableMetadata.getComment().isPresent()) {
        propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
    }

    TableMetadata metadata = newTableMetadata(operations, schema, partitionSpec, targetPath, propertiesBuilder.build());

    transaction = createTableTransaction(operations, metadata);

    return new IcebergWritableTableHandle(
            schemaName,
            tableName,
            SchemaParser.toJson(metadata.schema()),
            PartitionSpecParser.toJson(metadata.spec()),
            getColumns(metadata.schema(), typeManager),
            targetPath,
            fileFormat);
}

org.apache.iceberg.TableMetadata Java Examples