org.apache.iceberg.TableMetadata Java Examples
The following examples show how to use
org.apache.iceberg.TableMetadata.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveTableOperations.java From presto with Apache License 2.0 | 6 votes |
@Override public TableMetadata refresh() { if (location.isPresent()) { refreshFromMetadataLocation(null); return currentMetadata; } Table table = getTable(); if (!isIcebergTable(table)) { throw new UnknownTableTypeException(getSchemaTableName()); } String metadataLocation = table.getParameters().get(METADATA_LOCATION); if (metadataLocation == null) { throw new PrestoException(ICEBERG_INVALID_METADATA, format("Table is missing [%s] property: %s", METADATA_LOCATION, getSchemaTableName())); } refreshFromMetadataLocation(metadataLocation); return currentMetadata; }
Example #2
Source File: HiveTableOperations.java From presto with Apache License 2.0 | 6 votes |
@Override public String metadataFileLocation(String filename) { TableMetadata metadata = current(); String location; if (metadata != null) { String writeLocation = metadata.properties().get(WRITE_METADATA_LOCATION); if (writeLocation != null) { return format("%s/%s", writeLocation, filename); } location = metadata.location(); } else { location = this.location.orElseThrow(() -> new IllegalStateException("Location not set")); } return format("%s/%s/%s", location, METADATA_FOLDER_NAME, filename); }
Example #3
Source File: IcebergCatalog.java From dremio-oss with Apache License 2.0 | 6 votes |
public void beginCreateTable(BatchSchema writerSchema, List<String> partitionColumns) { Preconditions.checkState(transaction == null, "Unexpected state"); IcebergTableOperations tableOperations = new IcebergTableOperations(fsPath, configuration); SchemaConverter schemaConverter = new SchemaConverter(); Schema schema; try { schema = schemaConverter.toIceberg(writerSchema); } catch (Exception ex) { throw UserException.validationError(ex).buildSilently(); } PartitionSpec partitionSpec = getIcebergPartitionSpec(writerSchema, partitionColumns); TableMetadata metadata = TableMetadata.newTableMetadata(tableOperations, schema, partitionSpec, fsPath.toString()); transaction = createTableTransaction(tableOperations, metadata); table = transaction.table(); beginInsert(); }
Example #4
Source File: TestHadoopCommits.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testMergeAppend() throws Exception { testFastAppend(); // create 2 compatible manifest files that will be merged // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); // third append table.newAppend() .appendFile(FILE_C) .commit(); List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles()); Assert.assertEquals("Should scan 3 files", 3, tasks.size()); Assert.assertEquals("Should contain 3 Avro manifest files", 3, listManifestFiles().size()); TableMetadata metadata = readMetadataVersion(5); Assert.assertEquals("Current snapshot should contain 1 merged manifest", 1, metadata.currentSnapshot().allManifests().size()); }
Example #5
Source File: HadoopTables.java From iceberg with Apache License 2.0 | 6 votes |
/** * Create a table using the FileSystem implementation resolve from * location. * * @param schema iceberg schema used to create the table * @param spec partitioning spec, if null the table will be unpartitioned * @param properties a string map of table properties, initialized to empty if null * @param location a path URI (e.g. hdfs:///warehouse/my_table) * @return newly created table implementation */ @Override public Table create(Schema schema, PartitionSpec spec, Map<String, String> properties, String location) { Preconditions.checkNotNull(schema, "A table schema is required"); TableOperations ops = newTableOps(location); if (ops.current() != null) { throw new AlreadyExistsException("Table already exists at location: " + location); } Map<String, String> tableProps = properties == null ? ImmutableMap.of() : properties; PartitionSpec partitionSpec = spec == null ? PartitionSpec.unpartitioned() : spec; TableMetadata metadata = TableMetadata.newTableMetadata(schema, partitionSpec, location, tableProps); ops.commit(null, metadata); return new BaseTable(ops, location); }
Example #6
Source File: HadoopTableOperations.java From iceberg with Apache License 2.0 | 6 votes |
/** * Deletes the oldest metadata files if {@link TableProperties#METADATA_DELETE_AFTER_COMMIT_ENABLED} is true. * * @param base table metadata on which previous versions were based * @param metadata new table metadata with updated previous versions */ private void deleteRemovedMetadataFiles(TableMetadata base, TableMetadata metadata) { if (base == null) { return; } boolean deleteAfterCommit = metadata.propertyAsBoolean( TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT); Set<TableMetadata.MetadataLogEntry> removedPreviousMetadataFiles = Sets.newHashSet(base.previousFiles()); removedPreviousMetadataFiles.removeAll(metadata.previousFiles()); if (deleteAfterCommit) { Tasks.foreach(removedPreviousMetadataFiles) .noRetry().suppressFailureWhenFinished() .onFailure((previousMetadataFile, exc) -> LOG.warn("Delete failed for previous metadata file: {}", previousMetadataFile, exc)) .run(previousMetadataFile -> io().deleteFile(previousMetadataFile.file())); } }
Example #7
Source File: TestTables.java From iceberg with Apache License 2.0 | 6 votes |
@Override public void commit(TableMetadata base, TableMetadata metadata) { if (base != current) { throw new CommitFailedException("Cannot commit changes based on stale metadata"); } synchronized (METADATA) { refresh(); if (base == current) { if (failCommits > 0) { this.failCommits -= 1; throw new CommitFailedException("Injected failure"); } METADATA.put(tableName, metadata); this.current = metadata; } else { throw new CommitFailedException( "Commit failed: table was updated at %d", base.lastUpdatedMillis()); } } }
Example #8
Source File: HadoopTableOperations.java From iceberg with Apache License 2.0 | 5 votes |
@Override public TableMetadata current() { if (shouldRefresh) { return refresh(); } return currentMetadata; }
Example #9
Source File: IcebergTableOps.java From metacat with Apache License 2.0 | 5 votes |
@Override public void commit(final TableMetadata base, final TableMetadata metadata) { if (!base.equals(metadata)) { location = writeNewMetadata(metadata, currentVersion() + 1); tableMetadata = null; this.requestRefresh(); } }
Example #10
Source File: IcebergTableOps.java From metacat with Apache License 2.0 | 5 votes |
@Override public TableMetadata current() { if (tableMetadata == null) { tableMetadata = icebergTableOpsProxy.getMetadata(this, config.isIcebergCacheEnabled()); } return tableMetadata; }
Example #11
Source File: TestHadoopCommits.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testFastAppend() throws Exception { // first append table.newFastAppend() .appendFile(FILE_A) .commit(); Assert.assertTrue("Should create v2 for the update", version(2).exists() && version(2).isFile()); Assert.assertEquals("Should write the current version to the hint file", 2, readVersionHint()); List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles()); Assert.assertEquals("Should scan 1 file", 1, tasks.size()); List<File> manifests = listManifestFiles(); Assert.assertEquals("Should contain only one Avro manifest file", 1, manifests.size()); // second append table.newFastAppend() .appendFile(FILE_B) .commit(); Assert.assertTrue("Should create v3 for the update", version(3).exists() && version(3).isFile()); Assert.assertEquals("Should write the current version to the hint file", 3, readVersionHint()); tasks = Lists.newArrayList(table.newScan().planFiles()); Assert.assertEquals("Should scan 2 files", 2, tasks.size()); Assert.assertEquals("Should contain 2 Avro manifest files", 2, listManifestFiles().size()); TableMetadata metadata = readMetadataVersion(3); Assert.assertEquals("Current snapshot should contain 2 manifests", 2, metadata.currentSnapshot().allManifests().size()); }
Example #12
Source File: WapUtil.java From iceberg with Apache License 2.0 | 5 votes |
/** * Check if a given staged snapshot's associated wap-id was already published. Does not fail for non-WAP workflows. * * @param current the current {@link TableMetadata metadata} for the target table * @param wapSnapshotId a snapshot id which could have been staged and is associated with a wap id * @return the WAP ID that will be published, if the snapshot has one */ public static String validateWapPublish(TableMetadata current, long wapSnapshotId) { Snapshot cherryPickSnapshot = current.snapshot(wapSnapshotId); String wapId = stagedWapId(cherryPickSnapshot); if (wapId != null && !wapId.isEmpty()) { if (WapUtil.isWapIdPublished(current, wapId)) { throw new DuplicateWAPCommitException(wapId); } } return wapId; }
Example #13
Source File: HadoopCatalog.java From iceberg with Apache License 2.0 | 5 votes |
@Override public boolean dropTable(TableIdentifier identifier, boolean purge) { if (!isValidIdentifier(identifier)) { throw new NoSuchTableException("Invalid identifier: %s", identifier); } Path tablePath = new Path(defaultWarehouseLocation(identifier)); TableOperations ops = newTableOps(identifier); TableMetadata lastMetadata; if (purge && ops.current() != null) { lastMetadata = ops.current(); } else { lastMetadata = null; } try { if (purge && lastMetadata != null) { // Since the data files and the metadata files may store in different locations, // so it has to call dropTableData to force delete the data file. dropTableData(ops.io(), lastMetadata); } fs.delete(tablePath, true /* recursive */); return true; } catch (IOException e) { throw new RuntimeIOException(e, "Failed to delete file: %s", tablePath); } }
Example #14
Source File: HadoopTableOperations.java From iceberg with Apache License 2.0 | 5 votes |
private static TableMetadata checkUUID(TableMetadata currentMetadata, TableMetadata newMetadata) { String newUUID = newMetadata.uuid(); if (currentMetadata != null && currentMetadata.uuid() != null && newUUID != null) { Preconditions.checkState(newUUID.equals(currentMetadata.uuid()), "Table UUID does not match: current=%s != refreshed=%s", currentMetadata.uuid(), newUUID); } return newMetadata; }
Example #15
Source File: HadoopTableOperations.java From iceberg with Apache License 2.0 | 5 votes |
@Override public TableMetadata refresh() { int ver = version != null ? version : readVersionHint(); try { Path metadataFile = getMetadataFile(ver); if (version == null && metadataFile == null && ver == 0) { // no v0 metadata means the table doesn't exist yet return null; } else if (metadataFile == null) { throw new ValidationException("Metadata file for version %d is missing", ver); } Path nextMetadataFile = getMetadataFile(ver + 1); while (nextMetadataFile != null) { ver += 1; metadataFile = nextMetadataFile; nextMetadataFile = getMetadataFile(ver + 1); } updateVersionAndMetadata(ver, metadataFile.toString()); this.shouldRefresh = false; return currentMetadata; } catch (IOException e) { throw new RuntimeIOException(e, "Failed to refresh the table"); } }
Example #16
Source File: TestTables.java From iceberg with Apache License 2.0 | 5 votes |
@Override public TableMetadata refresh() { synchronized (METADATA) { this.current = METADATA.get(tableName); } return current; }
Example #17
Source File: HiveTableOperations.java From presto with Apache License 2.0 | 5 votes |
private static String metadataFileLocation(TableMetadata metadata, String filename) { String location = metadata.properties().get(WRITE_METADATA_LOCATION); if (location != null) { return format("%s/%s", location, filename); } return format("%s/%s/%s", metadata.location(), METADATA_FOLDER_NAME, filename); }
Example #18
Source File: HiveTableOperations.java From presto with Apache License 2.0 | 5 votes |
@Override public TableMetadata current() { if (shouldRefresh) { return refresh(); } return currentMetadata; }
Example #19
Source File: HiveTableOperations.java From presto with Apache License 2.0 | 5 votes |
private String writeNewMetadata(TableMetadata metadata, int newVersion) { String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion); OutputFile newMetadataLocation = fileIo.newOutputFile(newTableMetadataFilePath); // write the new metadata TableMetadataParser.write(metadata, newMetadataLocation); return newTableMetadataFilePath; }
Example #20
Source File: HiveTableOperations.java From presto with Apache License 2.0 | 5 votes |
private void refreshFromMetadataLocation(String newLocation) { // use null-safe equality check because new tables have a null metadata location if (Objects.equals(currentMetadataLocation, newLocation)) { shouldRefresh = false; return; } AtomicReference<TableMetadata> newMetadata = new AtomicReference<>(); Tasks.foreach(newLocation) .retry(20) .exponentialBackoff(100, 5000, 600000, 4.0) .suppressFailureWhenFinished() .run(metadataLocation -> newMetadata.set( TableMetadataParser.read(this, io().newInputFile(metadataLocation)))); String newUUID = newMetadata.get().uuid(); if (currentMetadata != null) { checkState(newUUID == null || newUUID.equals(currentMetadata.uuid()), "Table UUID does not match: current=%s != refreshed=%s", currentMetadata.uuid(), newUUID); } currentMetadata = newMetadata.get(); currentMetadataLocation = newLocation; version = parseVersion(newLocation); shouldRefresh = false; }
Example #21
Source File: TestHiveCommits.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testSuppressUnlockExceptions() throws TException, InterruptedException { Table table = catalog.loadTable(TABLE_IDENTIFIER); HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); TableMetadata metadataV1 = ops.current(); table.updateSchema() .addColumn("n", Types.IntegerType.get()) .commit(); ops.refresh(); TableMetadata metadataV2 = ops.current(); Assert.assertEquals(2, ops.current().schema().columns().size()); HiveTableOperations spyOps = spy(ops); ArgumentCaptor<Long> lockId = ArgumentCaptor.forClass(Long.class); doThrow(new RuntimeException()).when(spyOps).doUnlock(lockId.capture()); try { spyOps.commit(metadataV2, metadataV1); } finally { ops.doUnlock(lockId.getValue()); } ops.refresh(); // the commit must succeed Assert.assertEquals(1, ops.current().schema().columns().size()); }
Example #22
Source File: RemoveOrphanFilesAction.java From iceberg with Apache License 2.0 | 5 votes |
private Dataset<Row> buildValidMetadataFileDF() { String allManifestsMetadataTable = metadataTableName(MetadataTableType.ALL_MANIFESTS); Dataset<Row> manifestDF = spark.read().format("iceberg") .load(allManifestsMetadataTable) .selectExpr("path as file_path"); List<String> otherMetadataFiles = Lists.newArrayList(); for (Snapshot snapshot : table.snapshots()) { String manifestListLocation = snapshot.manifestListLocation(); if (manifestListLocation != null) { otherMetadataFiles.add(manifestListLocation); } } otherMetadataFiles.add(ops.metadataFileLocation("version-hint.text")); TableMetadata metadata = ops.current(); otherMetadataFiles.add(metadata.metadataFileLocation()); for (TableMetadata.MetadataLogEntry previousMetadataFile : metadata.previousFiles()) { otherMetadataFiles.add(previousMetadataFile.file()); } Dataset<Row> otherMetadataFileDF = spark .createDataset(otherMetadataFiles, Encoders.STRING()) .toDF("file_path"); return manifestDF.union(otherMetadataFileDF); }
Example #23
Source File: TestTables.java From iceberg with Apache License 2.0 | 5 votes |
static TestTable create(File temp, String name, Schema schema, PartitionSpec spec) { TestTableOperations ops = new TestTableOperations(name); if (ops.current() != null) { throw new AlreadyExistsException("Table %s already exists at location: %s", name, temp); } ops.commit(null, TableMetadata.newTableMetadata(schema, spec, temp.toString(), ImmutableMap.of())); return new TestTable(ops, name); }
Example #24
Source File: HadoopTableOperations.java From iceberg with Apache License 2.0 | 4 votes |
@Override public TableOperations temp(TableMetadata uncommittedMetadata) { return new TableOperations() { @Override public TableMetadata current() { return uncommittedMetadata; } @Override public TableMetadata refresh() { throw new UnsupportedOperationException("Cannot call refresh on temporary table operations"); } @Override public void commit(TableMetadata base, TableMetadata metadata) { throw new UnsupportedOperationException("Cannot call commit on temporary table operations"); } @Override public String metadataFileLocation(String fileName) { return HadoopTableOperations.this.metadataFileLocation(fileName); } @Override public LocationProvider locationProvider() { return LocationProviders.locationsFor(uncommittedMetadata.location(), uncommittedMetadata.properties()); } @Override public FileIO io() { return HadoopTableOperations.this.io(); } @Override public EncryptionManager encryption() { return HadoopTableOperations.this.encryption(); } @Override public long newSnapshotId() { return HadoopTableOperations.this.newSnapshotId(); } }; }
Example #25
Source File: HadoopTableOperations.java From iceberg with Apache License 2.0 | 4 votes |
@Override public void commit(TableMetadata base, TableMetadata metadata) { Pair<Integer, TableMetadata> current = versionAndMetadata(); if (base != current.second()) { throw new CommitFailedException("Cannot commit changes based on stale table metadata"); } if (base == metadata) { LOG.info("Nothing to commit."); return; } Preconditions.checkArgument(base == null || base.location().equals(metadata.location()), "Hadoop path-based tables cannot be relocated"); Preconditions.checkArgument( !metadata.properties().containsKey(TableProperties.WRITE_METADATA_LOCATION), "Hadoop path-based tables cannot relocate metadata"); String codecName = metadata.property( TableProperties.METADATA_COMPRESSION, TableProperties.METADATA_COMPRESSION_DEFAULT); TableMetadataParser.Codec codec = TableMetadataParser.Codec.fromName(codecName); String fileExtension = TableMetadataParser.getFileExtension(codec); Path tempMetadataFile = metadataPath(UUID.randomUUID().toString() + fileExtension); TableMetadataParser.write(metadata, io().newOutputFile(tempMetadataFile.toString())); int nextVersion = (current.first() != null ? current.first() : 0) + 1; Path finalMetadataFile = metadataFilePath(nextVersion, codec); FileSystem fs = getFileSystem(tempMetadataFile, conf); try { if (fs.exists(finalMetadataFile)) { throw new CommitFailedException( "Version %d already exists: %s", nextVersion, finalMetadataFile); } } catch (IOException e) { throw new RuntimeIOException(e, "Failed to check if next version exists: " + finalMetadataFile); } // this rename operation is the atomic commit operation renameToFinal(fs, tempMetadataFile, finalMetadataFile); // update the best-effort version pointer writeVersionHint(nextVersion); deleteRemovedMetadataFiles(base, metadata); this.shouldRefresh = true; }
Example #26
Source File: TestTables.java From iceberg with Apache License 2.0 | 4 votes |
static TableMetadata readMetadata(String tableName) { synchronized (METADATA) { return METADATA.get(tableName); } }
Example #27
Source File: TestTables.java From iceberg with Apache License 2.0 | 4 votes |
@Override public TableMetadata current() { return current; }
Example #28
Source File: HiveTableOperations.java From presto with Apache License 2.0 | 4 votes |
private static String newTableMetadataFilePath(TableMetadata meta, int newVersion) { String codec = meta.property(METADATA_COMPRESSION, METADATA_COMPRESSION_DEFAULT); return metadataFileLocation(meta, format("%05d-%s%s", newVersion, randomUUID(), getFileExtension(codec))); }
Example #29
Source File: HadoopTableOperations.java From iceberg with Apache License 2.0 | 4 votes |
private synchronized Pair<Integer, TableMetadata> versionAndMetadata() { return Pair.of(version, currentMetadata); }
Example #30
Source File: IcebergMetadata.java From presto with Apache License 2.0 | 4 votes |
@Override public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) { SchemaTableName schemaTableName = tableMetadata.getTable(); String schemaName = schemaTableName.getSchemaName(); String tableName = schemaTableName.getTableName(); Schema schema = toIcebergSchema(tableMetadata.getColumns()); PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties())); Database database = metastore.getDatabase(schemaName) .orElseThrow(() -> new SchemaNotFoundException(schemaName)); HdfsContext hdfsContext = new HdfsContext(session, schemaName, tableName); HiveIdentity identity = new HiveIdentity(session); String targetPath = getTableLocation(tableMetadata.getProperties()); if (targetPath == null) { targetPath = getTableDefaultLocation(database, hdfsContext, hdfsEnvironment, schemaName, tableName).toString(); } TableOperations operations = new HiveTableOperations(metastore, hdfsEnvironment, hdfsContext, identity, schemaName, tableName, session.getUser(), targetPath); if (operations.current() != null) { throw new TableAlreadyExistsException(schemaTableName); } ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builderWithExpectedSize(2); FileFormat fileFormat = getFileFormat(tableMetadata.getProperties()); propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString()); if (tableMetadata.getComment().isPresent()) { propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get()); } TableMetadata metadata = newTableMetadata(operations, schema, partitionSpec, targetPath, propertiesBuilder.build()); transaction = createTableTransaction(operations, metadata); return new IcebergWritableTableHandle( schemaName, tableName, SchemaParser.toJson(metadata.schema()), PartitionSpecParser.toJson(metadata.spec()), getColumns(metadata.schema(), typeManager), targetPath, fileFormat); }