org.apache.iceberg.types.TypeUtil Java Exaples

Source File: IcebergCatalog.java From dremio-oss with Apache License 2.0

6 votes

public void changeColumn(String columnToChange, Types.NestedField newDef) {
  IcebergTableOperations tableOperations = new IcebergTableOperations(fsPath, configuration);
  table = new BaseTable(tableOperations, fsPath.getName());
  Types.NestedField columnToChangeInIceberg = table.schema().caseInsensitiveFindField(columnToChange);
  if (!table.spec().getFieldsBySourceId(columnToChangeInIceberg.fieldId()).isEmpty()) { // column is part of partitionspec
    throw UserException.unsupportedError().message("[%s] is a partition column. Partition spec change is not supported.",
        columnToChangeInIceberg.name()).buildSilently();
  }

  if (!TypeUtil.isPromotionAllowed(columnToChangeInIceberg.type(), newDef.type()
      .asPrimitiveType())) {
    throw UserException.validationError()
        .message("Cannot change data type of column [%s] from %s to %s",
            columnToChangeInIceberg.name(),
            sqlTypeNameWithPrecisionAndScale(columnToChangeInIceberg.type()),
            sqlTypeNameWithPrecisionAndScale(newDef.type()))
        .buildSilently();
  }

  table.updateSchema()
      .renameColumn(columnToChangeInIceberg.name(), newDef.name())
      .updateColumn(columnToChangeInIceberg.name(), newDef.type().asPrimitiveType())
      .commit();
}

Source File: IcebergSource.java From iceberg with Apache License 2.0

6 votes

@Override
public StreamWriter createStreamWriter(String runId, StructType dsStruct,
                                       OutputMode mode, DataSourceOptions options) {
  Preconditions.checkArgument(
      mode == OutputMode.Append() || mode == OutputMode.Complete(),
      "Output mode %s is not supported", mode);
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema, checkNullability(options), checkOrdering(options));
  SparkUtil.validatePartitionTransforms(table.spec());
  // Spark 2.4.x passes runId to createStreamWriter instead of real queryId,
  // so we fetch it directly from sparkContext to make writes idempotent
  String queryId = lazySparkSession().sparkContext().getLocalProperty(StreamExecution.QUERY_ID_KEY());
  String appId = lazySparkSession().sparkContext().applicationId();

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new StreamingWriter(table, io, encryptionManager, options, queryId, mode, appId, writeSchema, dsStruct);
}

Source File: TestIcebergInputFormat.java From iceberg with Apache License 2.0

6 votes

@Test
public void testProjection() throws Exception {
  File location = temp.newFolder(format.name());
  Assert.assertTrue(location.delete());
  Schema projectedSchema = TypeUtil.select(SCHEMA, ImmutableSet.of(1));
  Table table = tables.create(SCHEMA, SPEC,
                              ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()),
                              location.toString());
  List<Record> inputRecords = RandomGenericData.generate(table.schema(), 1, 0L);
  DataFile dataFile = writeFile(table, Row.of("2020-03-20", 0), format, inputRecords);
  table.newAppend()
       .appendFile(dataFile)
       .commit();

  Job job = Job.getInstance(conf);
  IcebergInputFormat.ConfigBuilder configBuilder = IcebergInputFormat.configure(job);
  configBuilder
      .readFrom(location.toString())
      .project(projectedSchema);
  List<Record> outputRecords = readRecords(job.getConfiguration());
  Assert.assertEquals(inputRecords.size(), outputRecords.size());
  Assert.assertEquals(projectedSchema.asStruct(), outputRecords.get(0).struct());
}

Source File: IcebergSource.java From iceberg with Apache License 2.0

6 votes

@Override
public Optional<DataSourceWriter> createWriter(String jobId, StructType dsStruct, SaveMode mode,
                                               DataSourceOptions options) {
  Preconditions.checkArgument(mode == SaveMode.Append || mode == SaveMode.Overwrite,
      "Save mode %s is not supported", mode);
  Configuration conf = new Configuration(lazyBaseConf());
  Table table = getTableAndResolveHadoopConfiguration(options, conf);
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema, checkNullability(options), checkOrdering(options));
  SparkUtil.validatePartitionTransforms(table.spec());
  String appId = lazySparkSession().sparkContext().applicationId();
  String wapId = lazySparkSession().conf().get("spark.wap.id", null);
  boolean replacePartitions = mode == SaveMode.Overwrite;

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return Optional.of(new Writer(
      table, io, encryptionManager, options, replacePartitions, appId, wapId, writeSchema, dsStruct));
}

Source File: Schema.java From iceberg with Apache License 2.0

6 votes

private Schema internalSelect(Collection<String> names, boolean caseSensitive) {
  if (names.contains(ALL_COLUMNS)) {
    return this;
  }

  Set<Integer> selected = Sets.newHashSet();
  for (String name : names) {
    Integer id;
    if (caseSensitive) {
      id = lazyNameToId().get(name);
    } else {
      id = lazyLowerCaseNameToId().get(name.toLowerCase(Locale.ROOT));
    }

    if (id != null) {
      selected.add(id);
    }
  }

  return TypeUtil.select(this, selected);
}

Source File: TestParquetVectorizedReads.java From iceberg with Apache License 2.0

6 votes

private void writeAndValidate(
    Schema schema, int numRecords, long seed, float nullPercentage,
    boolean setAndCheckArrowValidityVector, boolean reuseContainers)
    throws IOException {
  // Write test data
  Assume.assumeTrue("Parquet Avro cannot write non-string map keys", null == TypeUtil.find(
      schema,
      type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get()));

  Iterable<GenericData.Record> expected = generateData(schema, numRecords, seed, nullPercentage);

  // write a test parquet file using iceberg writer
  File testFile = temp.newFile();
  Assert.assertTrue("Delete should succeed", testFile.delete());

  try (FileAppender<GenericData.Record> writer = getParquetWriter(schema, testFile)) {
    writer.addAll(expected);
  }
  assertRecordsMatch(schema, numRecords, expected, testFile, setAndCheckArrowValidityVector, reuseContainers);
}

Source File: TestIcebergInputFormat.java From iceberg with Apache License 2.0

6 votes

private void validateIdentityPartitionProjections(
    String tablePath, Schema projectedSchema, List<Record> inputRecords) throws Exception {
  Job job = Job.getInstance(conf);
  IcebergInputFormat.ConfigBuilder configBuilder = IcebergInputFormat.configure(job);
  configBuilder
      .readFrom(tablePath)
      .project(projectedSchema);
  List<Record> actualRecords = readRecords(job.getConfiguration());

  Set<String> fieldNames = TypeUtil.indexByName(projectedSchema.asStruct()).keySet();
  for (int pos = 0; pos < inputRecords.size(); pos++) {
    Record inputRecord = inputRecords.get(pos);
    Record actualRecord = actualRecords.get(pos);
    Assert.assertEquals("Projected schema should match", projectedSchema.asStruct(), actualRecord.struct());
    for (String name : fieldNames) {
      Assert.assertEquals(
          "Projected field " + name + " should match", inputRecord.getField(name), actualRecord.getField(name));
    }
  }
}

Source File: RowDataReader.java From iceberg with Apache License 2.0

6 votes

@Override
CloseableIterator<InternalRow> open(FileScanTask task) {
  DataFile file = task.file();

  // update the current file for Spark's filename() function
  InputFileBlockHolder.set(file.path().toString(), task.start(), task.length());

  // schema or rows returned by readers
  PartitionSpec spec = task.spec();
  Set<Integer> idColumns = spec.identitySourceIds();
  Schema partitionSchema = TypeUtil.select(expectedSchema, idColumns);
  boolean projectsIdentityPartitionColumns = !partitionSchema.columns().isEmpty();

  if (projectsIdentityPartitionColumns) {
    return open(task, expectedSchema, PartitionUtil.constantsMap(task, RowDataReader::convertConstant))
        .iterator();
  }
  // return the base iterator
  return open(task, expectedSchema, ImmutableMap.of()).iterator();
}

Source File: RandomData.java From iceberg with Apache License 2.0

6 votes

public static Iterable<InternalRow> generateSpark(Schema schema, int numRecords, long seed) {
  return () -> new Iterator<InternalRow>() {
    private SparkRandomDataGenerator generator = new SparkRandomDataGenerator(seed);
    private int count = 0;

    @Override
    public boolean hasNext() {
      return count < numRecords;
    }

    @Override
    public InternalRow next() {
      if (count >= numRecords) {
        throw new NoSuchElementException();
      }
      count += 1;
      return (InternalRow) TypeUtil.visit(schema, generator);
    }
  };
}

Source File: RandomData.java From iceberg with Apache License 2.0

6 votes

private static Iterable<Record> newIterable(Supplier<RandomDataGenerator> newGenerator,
                                            Schema schema, int numRecords) {
  return () -> new Iterator<Record>() {
    private int count = 0;
    private RandomDataGenerator generator = newGenerator.get();

    @Override
    public boolean hasNext() {
      return count < numRecords;
    }

    @Override
    public Record next() {
      if (count >= numRecords) {
        throw new NoSuchElementException();
      }
      count += 1;
      return (Record) TypeUtil.visit(schema, generator);
    }
  };
}

Source File: IcebergMetadata.java From presto with Apache License 2.0

6 votes

private static Schema toIcebergSchema(List<ColumnMetadata> columns)
{
    List<NestedField> icebergColumns = new ArrayList<>();
    for (ColumnMetadata column : columns) {
        if (!column.isHidden()) {
            int index = icebergColumns.size();
            Type type = toIcebergType(column.getType());
            NestedField field = column.isNullable()
                    ? NestedField.optional(index, column.getName(), type, column.getComment())
                    : NestedField.required(index, column.getName(), type, column.getComment());
            icebergColumns.add(field);
        }
    }
    Schema schema = new Schema(icebergColumns);
    AtomicInteger nextFieldId = new AtomicInteger(1);
    return TypeUtil.assignFreshIds(schema, nextFieldId::getAndIncrement);
}

Source File: RandomData.java From iceberg with Apache License 2.0

6 votes

private static Iterable<Row> generateData(Schema schema, int numRecords, Supplier<RandomRowGenerator> supplier) {
  return () -> new Iterator<Row>() {
    private final RandomRowGenerator generator = supplier.get();
    private int count = 0;

    @Override
    public boolean hasNext() {
      return count < numRecords;
    }

    @Override
    public Row next() {
      if (!hasNext()) {
        throw new NoSuchElementException();
      }
      ++count;
      return (Row) TypeUtil.visit(schema, generator);
    }
  };
}

Source File: SparkWriteBuilder.java From iceberg with Apache License 2.0

6 votes

@Override
public BatchWrite buildForBatch() {
  // Validate
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsSchema);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema,
      checkNullability(spark, options), checkOrdering(spark, options));
  SparkUtil.validatePartitionTransforms(table.spec());

  // Get application id
  String appId = spark.sparkContext().applicationId();

  // Get write-audit-publish id
  String wapId = spark.conf().get("spark.wap.id", null);

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new SparkBatchWrite(
      table, io, encryptionManager, options, overwriteDynamic, overwriteByFilter, overwriteExpr, appId, wapId,
      writeSchema, dsSchema);
}

Source File: SparkWriteBuilder.java From iceberg with Apache License 2.0

6 votes

@Override
public StreamingWrite buildForStreaming() {
  // Validate
  Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsSchema);
  TypeUtil.validateWriteSchema(table.schema(), writeSchema,
      checkNullability(spark, options), checkOrdering(spark, options));
  SparkUtil.validatePartitionTransforms(table.spec());

  // Change to streaming write if it is just append
  Preconditions.checkState(!overwriteDynamic,
      "Unsupported streaming operation: dynamic partition overwrite");
  Preconditions.checkState(!overwriteByFilter || overwriteExpr == Expressions.alwaysTrue(),
      "Unsupported streaming operation: overwrite by filter: %s", overwriteExpr);

  // Get application id
  String appId = spark.sparkContext().applicationId();

  // Get write-audit-publish id
  String wapId = spark.conf().get("spark.wap.id", null);

  Broadcast<FileIO> io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table));
  Broadcast<EncryptionManager> encryptionManager = lazySparkContext().broadcast(table.encryption());

  return new SparkStreamingWrite(
      table, io, encryptionManager, options, overwriteByFilter, writeQueryId, appId, wapId, writeSchema, dsSchema);
}

Source File: BaseTableScan.java From iceberg with Apache License 2.0

6 votes

/**
 * To be able to make refinements {@link #select(Collection)} and {@link #caseSensitive(boolean)} in any order,
 * we resolve the schema to be projected lazily here.
 *
 * @return the Schema to project
 */
private Schema lazyColumnProjection() {
  Collection<String> selectedColumns = context.selectedColumns();
  if (selectedColumns != null) {
    Set<Integer> requiredFieldIds = Sets.newHashSet();

    // all of the filter columns are required
    requiredFieldIds.addAll(
        Binder.boundReferences(table.schema().asStruct(),
            Collections.singletonList(context.rowFilter()), context.caseSensitive()));

    // all of the projection columns are required
    Set<Integer> selectedIds;
    if (context.caseSensitive()) {
      selectedIds = TypeUtil.getProjectedIds(table.schema().select(selectedColumns));
    } else {
      selectedIds = TypeUtil.getProjectedIds(table.schema().caseInsensitiveSelect(selectedColumns));
    }
    requiredFieldIds.addAll(selectedIds);

    return TypeUtil.select(table.schema(), requiredFieldIds);
  }

  return schema;
}

Source File: TestCreateTransaction.java From iceberg with Apache License 2.0

6 votes

@Test
public void testCreateTransaction() throws IOException {
  File tableDir = temp.newFolder();
  Assert.assertTrue(tableDir.delete());

  Transaction txn = TestTables.beginCreate(tableDir, "test_create", SCHEMA, unpartitioned());

  Assert.assertNull("Starting a create transaction should not commit metadata",
      TestTables.readMetadata("test_create"));
  Assert.assertNull("Should have no metadata version",
      TestTables.metadataVersion("test_create"));

  txn.commitTransaction();

  TableMetadata meta = TestTables.readMetadata("test_create");
  Assert.assertNotNull("Table metadata should be created after transaction commits", meta);
  Assert.assertEquals("Should have metadata version 0",
      0, (int) TestTables.metadataVersion("test_create"));
  Assert.assertEquals("Should have 0 manifest files",
      0, listManifestFiles(tableDir).size());

  Assert.assertEquals("Table schema should match with reassigned IDs",
      TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
  Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec());
  Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size());
}

Source File: TestSchemaUpdate.java From iceberg with Apache License 2.0

6 votes

@Test
public void testDeleteFields() {
  // use schema projection to test column deletes
  List<String> columns = Lists.newArrayList("id", "data", "preferences", "preferences.feature1",
      "preferences.feature2", "locations", "locations.lat", "locations.long", "points",
      "points.x", "points.y", "doubles", "properties");
  for (String name : columns) {
    Set<Integer> selected = Sets.newHashSet(ALL_IDS);
    // remove the id and any nested fields from the projection
    Types.NestedField nested = SCHEMA.findField(name);
    selected.remove(nested.fieldId());
    selected.removeAll(TypeUtil.getProjectedIds(nested.type()));

    Schema del = new SchemaUpdate(SCHEMA, 19).deleteColumn(name).apply();

    Assert.assertEquals("Should match projection with '" + name + "' removed",
        TypeUtil.select(SCHEMA, selected).asStruct(), del.asStruct());
  }
}

Source File: SchemaUpdate.java From iceberg with Apache License 2.0

5 votes

private static Schema applyChanges(Schema schema, List<Integer> deletes,
                                   Map<Integer, Types.NestedField> updates,
                                   Multimap<Integer, Types.NestedField> adds,
                                   Multimap<Integer, Move> moves) {
  Types.StructType struct = TypeUtil
      .visit(schema, new ApplyChanges(deletes, updates, adds, moves))
      .asNestedType().asStructType();
  return new Schema(struct.fields());
}

Source File: SparkParquetWriters.java From iceberg with Apache License 2.0

5 votes

private FixedDecimalWriter(ColumnDescriptor desc, int precision, int scale) {
  super(desc);
  this.precision = precision;
  this.scale = scale;
  this.length = TypeUtil.decimalRequiredBytes(precision);
  this.bytes = ThreadLocal.withInitial(() -> new byte[length]);
}

Source File: AvroDataTest.java From iceberg with Apache License 2.0

5 votes

@Test
public void testArrayOfStructs() throws IOException {
  Schema schema = TypeUtil.assignIncreasingFreshIds(new Schema(
      required(0, "id", LongType.get()),
      optional(1, "data", ListType.ofOptional(2, SUPPORTED_PRIMITIVES))));

  writeAndValidate(schema);
}

Source File: AvroDataTest.java From iceberg with Apache License 2.0

5 votes

@Test
public void testMapOfStructs() throws IOException {
  Schema schema = TypeUtil.assignIncreasingFreshIds(new Schema(
      required(0, "id", LongType.get()),
      optional(1, "data", MapType.ofOptional(2, 3,
          Types.StringType.get(),
          SUPPORTED_PRIMITIVES))));

  writeAndValidate(schema);
}

Source File: AvroDataTest.java From iceberg with Apache License 2.0

5 votes

@Test
public void testMixedTypes() throws IOException {
  StructType structType = StructType.of(
      required(0, "id", LongType.get()),
      optional(1, "list_of_maps",
          ListType.ofOptional(2, MapType.ofOptional(3, 4,
              Types.StringType.get(),
              SUPPORTED_PRIMITIVES))),
      optional(5, "map_of_lists",
          MapType.ofOptional(6, 7,
              Types.StringType.get(),
              ListType.ofOptional(8, SUPPORTED_PRIMITIVES))),
      required(9, "list_of_lists",
          ListType.ofOptional(10, ListType.ofOptional(11, SUPPORTED_PRIMITIVES))),
      required(12, "map_of_maps",
          MapType.ofOptional(13, 14,
              Types.StringType.get(),
              MapType.ofOptional(15, 16,
                  Types.StringType.get(),
                  SUPPORTED_PRIMITIVES))),
      required(17, "list_of_struct_of_nested_types", ListType.ofOptional(19, StructType.of(
          Types.NestedField.required(20, "m1", MapType.ofOptional(21, 22,
              Types.StringType.get(),
              SUPPORTED_PRIMITIVES)),
          Types.NestedField.optional(23, "l1", ListType.ofRequired(24, SUPPORTED_PRIMITIVES)),
          Types.NestedField.required(25, "l2", ListType.ofRequired(26, SUPPORTED_PRIMITIVES)),
          Types.NestedField.optional(27, "m2", MapType.ofOptional(28, 29,
              Types.StringType.get(),
              SUPPORTED_PRIMITIVES))
      )))
  );

  Schema schema = new Schema(TypeUtil.assignFreshIds(structType, new AtomicInteger(0)::incrementAndGet)
      .asStructType().fields());

  writeAndValidate(schema);
}

Source File: TestParquetVectorizedReads.java From iceberg with Apache License 2.0

5 votes

@Test
@Override
public void testNestedStruct() {
  AssertHelpers.assertThrows(
      "Vectorized reads are not supported yet for struct fields",
      UnsupportedOperationException.class,
      "Vectorized reads are not supported yet for struct fields",
      () -> VectorizedSparkParquetReaders.buildReader(
          TypeUtil.assignIncreasingFreshIds(new Schema(required(
              1,
              "struct",
              SUPPORTED_PRIMITIVES))),
          new MessageType("struct", new GroupType(Type.Repetition.OPTIONAL, "struct").withId(1)),
          false));
}

Source File: TestParquetVectorizedReads.java From iceberg with Apache License 2.0

5 votes

@Test
public void testMostlyNullsForOptionalFields() throws IOException {
  writeAndValidate(
      TypeUtil.assignIncreasingFreshIds(new Schema(SUPPORTED_PRIMITIVES.fields())),
      getNumRows(),
      0L,
      0.99f,
      false,
      true);
}

Source File: SchemaUpdate.java From iceberg with Apache License 2.0

5 votes

/**
 * For testing only.
 */
SchemaUpdate(Schema schema, int lastColumnId) {
  this.ops = null;
  this.base = null;
  this.schema = schema;
  this.lastColumnId = lastColumnId;
  this.idToParent = Maps.newHashMap(TypeUtil.indexParents(schema.asStruct()));
}

Source File: SchemaUpdate.java From iceberg with Apache License 2.0

5 votes

SchemaUpdate(TableOperations ops) {
  this.ops = ops;
  this.base = ops.current();
  this.schema = base.schema();
  this.lastColumnId = base.lastColumnId();
  this.idToParent = Maps.newHashMap(TypeUtil.indexParents(schema.asStruct()));
}

Source File: ProjectionDatumReader.java From iceberg with Apache License 2.0

5 votes

@Override
public void setSchema(Schema newFileSchema) {
  this.fileSchema = newFileSchema;
  if (nameMapping == null && !AvroSchemaUtil.hasIds(fileSchema)) {
    nameMapping = MappingUtil.create(expectedSchema);
  }
  Set<Integer> projectedIds = TypeUtil.getProjectedIds(expectedSchema);
  Schema prunedSchema = AvroSchemaUtil.pruneColumns(newFileSchema, projectedIds, nameMapping);
  this.readSchema = AvroSchemaUtil.buildAvroProjection(prunedSchema, expectedSchema, renames);
  this.wrapped = newDatumReader();
}

Source File: TableMetadata.java From iceberg with Apache License 2.0

5 votes

static TableMetadata newTableMetadata(Schema schema,
                                      PartitionSpec spec,
                                      String location,
                                      Map<String, String> properties,
                                      int formatVersion) {
  // reassign all column ids to ensure consistency
  AtomicInteger lastColumnId = new AtomicInteger(0);
  Schema freshSchema = TypeUtil.assignFreshIds(schema, lastColumnId::incrementAndGet);

  // rebuild the partition spec using the new column ids
  PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(freshSchema)
      .withSpecId(INITIAL_SPEC_ID);
  for (PartitionField field : spec.fields()) {
    // look up the name of the source field in the old schema to get the new schema's id
    String sourceName = schema.findColumnName(field.sourceId());
    // reassign all partition fields with fresh partition field Ids to ensure consistency
    specBuilder.add(
        freshSchema.findField(sourceName).fieldId(),
        field.name(),
        field.transform().toString());
  }
  PartitionSpec freshSpec = specBuilder.build();

  return new TableMetadata(null, formatVersion, UUID.randomUUID().toString(), location,
      INITIAL_SEQUENCE_NUMBER, System.currentTimeMillis(),
      lastColumnId.get(), freshSchema, INITIAL_SPEC_ID, ImmutableList.of(freshSpec),
      ImmutableMap.copyOf(properties), -1, ImmutableList.of(),
      ImmutableList.of(), ImmutableList.of());
}

Source File: ParquetValueWriters.java From iceberg with Apache License 2.0

5 votes

private FixedDecimalWriter(ColumnDescriptor desc, int precision, int scale) {
  super(desc);
  this.precision = precision;
  this.scale = scale;
  this.length = TypeUtil.decimalRequiredBytes(precision);
  this.bytes = ThreadLocal.withInitial(() -> new byte[length]);
}

Source File: TestCreateTransaction.java From iceberg with Apache License 2.0

5 votes

@Test
public void testCreateAndAppendWithTransaction() throws IOException {
  File tableDir = temp.newFolder();
  Assert.assertTrue(tableDir.delete());

  Transaction txn = TestTables.beginCreate(tableDir, "test_append", SCHEMA, unpartitioned());

  Assert.assertNull("Starting a create transaction should not commit metadata",
      TestTables.readMetadata("test_append"));
  Assert.assertNull("Should have no metadata version",
      TestTables.metadataVersion("test_append"));

  txn.newAppend()
      .appendFile(FILE_A)
      .appendFile(FILE_B)
      .commit();

  Assert.assertNull("Appending in a transaction should not commit metadata",
      TestTables.readMetadata("test_append"));
  Assert.assertNull("Should have no metadata version",
      TestTables.metadataVersion("test_append"));

  txn.commitTransaction();

  TableMetadata meta = TestTables.readMetadata("test_append");
  Assert.assertNotNull("Table metadata should be created after transaction commits", meta);
  Assert.assertEquals("Should have metadata version 0",
      0, (int) TestTables.metadataVersion("test_append"));
  Assert.assertEquals("Should have 1 manifest file",
      1, listManifestFiles(tableDir).size());

  Assert.assertEquals("Table schema should match with reassigned IDs",
      TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), meta.schema().asStruct());
  Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec());
  Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size());

  validateSnapshot(null, meta.currentSnapshot(), FILE_A, FILE_B);
}

org.apache.iceberg.types.TypeUtil Java Examples