org.apache.iceberg.types.Types Java Exaples

Source File: UnboundTransform.java From iceberg with Apache License 2.0

8 votes

@SuppressWarnings("unchecked")
@Override
public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive) {
  BoundReference<S> boundRef = ref.bind(struct, caseSensitive);

  Transform<S, T> typeTransform;
  try {
    // TODO: Avoid using toString/fromString
    typeTransform = (Transform<S, T>) Transforms.fromString(boundRef.type(), transform.toString());
    ValidationException.check(typeTransform.canTransform(boundRef.type()),
        "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name());
  } catch (IllegalArgumentException e) {
    throw new ValidationException(
        "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name());
  }

  return new BoundTransform<>(boundRef, typeTransform);
}

Source File: AvroSchemaWithTypeVisitor.java From iceberg with Apache License 2.0

6 votes

public static <T> T visit(Type iType, Schema schema, AvroSchemaWithTypeVisitor<T> visitor) {
  switch (schema.getType()) {
    case RECORD:
      return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);

    case UNION:
      return visitUnion(iType, schema, visitor);

    case ARRAY:
      return visitArray(iType, schema, visitor);

    case MAP:
      Types.MapType map = iType != null ? iType.asMapType() : null;
      return visitor.map(map, schema,
          visit(map != null ? map.valueType() : null, schema.getValueType(), visitor));

    default:
      return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
  }
}

Source File: ArrowSchemaUtilTest.java From iceberg with Apache License 2.0

6 votes

@Test
public void convertPrimitive() {
  Schema iceberg = new Schema(
      Types.NestedField.optional(0, INTEGER_FIELD, IntegerType.get()),
      Types.NestedField.optional(1, BOOLEAN_FIELD, BooleanType.get()),
      Types.NestedField.required(2, DOUBLE_FIELD, DoubleType.get()),
      Types.NestedField.required(3, STRING_FIELD, StringType.get()),
      Types.NestedField.optional(4, DATE_FIELD, DateType.get()),
      Types.NestedField.optional(5, TIMESTAMP_FIELD, TimestampType.withZone()),
      Types.NestedField.optional(6, LONG_FIELD, LongType.get()),
      Types.NestedField.optional(7, FLOAT_FIELD, FloatType.get()),
      Types.NestedField.optional(8, TIME_FIELD, TimeType.get()),
      Types.NestedField.optional(9, BINARY_FIELD, Types.BinaryType.get()),
      Types.NestedField.optional(10, DECIMAL_FIELD, Types.DecimalType.of(1, 1)),
      Types.NestedField.optional(12, LIST_FIELD, Types.ListType.ofOptional(13, Types.IntegerType.get())),
      Types.NestedField.required(14, MAP_FIELD, Types.MapType.ofOptional(15, 16,
          StringType.get(), IntegerType.get())),
      Types.NestedField.optional(17, FIXED_WIDTH_BINARY_FIELD, Types.FixedType.ofLength(10)));

  org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg);

  validate(iceberg, arrow);
}

Source File: TestReadProjection.java From iceberg with Apache License 2.0

6 votes

@Test
public void testEmptyProjection() throws Exception {
  Schema schema = new Schema(
      Types.NestedField.required(0, "id", Types.LongType.get()),
      Types.NestedField.optional(1, "data", Types.StringType.get())
  );

  Record record = GenericRecord.create(schema);
  record.setField("id", 34L);
  record.setField("data", "test");

  Record projected = writeAndRead("empty_projection", schema, schema.select(), record);

  Assert.assertNotNull("Should read a non-null record", projected);
  try {
    projected.get(0);
    Assert.fail("Should not retrieve value with ordinal 0");
  } catch (ArrayIndexOutOfBoundsException e) {
    // this is expected because there are no values
  }
}

Source File: PartitionTable.java From presto with Apache License 2.0

6 votes

private static Object convert(Object value, Type type)
{
    if (value == null) {
        return null;
    }
    if (type instanceof Types.StringType) {
        return value.toString();
    }
    if (type instanceof Types.BinaryType) {
        // TODO the client sees the bytearray's tostring ouput instead of seeing actual bytes, needs to be fixed.
        return ((ByteBuffer) value).array();
    }
    if (type instanceof Types.TimestampType) {
        long utcMillis = TimeUnit.MICROSECONDS.toMillis((Long) value);
        Types.TimestampType timestampType = (Types.TimestampType) type;
        if (timestampType.shouldAdjustToUTC()) {
            return packDateTimeWithZone(utcMillis, TimeZoneKey.UTC_KEY);
        }
        return utcMillis;
    }
    if (type instanceof Types.FloatType) {
        return Float.floatToIntBits((Float) value);
    }
    return value;
}

Source File: TestPredicateBinding.java From iceberg with Apache License 2.0

6 votes

@Test
@SuppressWarnings("unchecked")
public void testIsNull() {
  StructType optional = StructType.of(optional(19, "s", Types.StringType.get()));

  UnboundPredicate<?> unbound = new UnboundPredicate<>(IS_NULL, ref("s"));
  Expression expr = unbound.bind(optional);
  BoundPredicate<?> bound = assertAndUnwrap(expr);
  Assert.assertEquals("Should use the same operation", IS_NULL, bound.op());
  Assert.assertEquals("Should use the correct field", 19, bound.ref().fieldId());
  Assert.assertTrue("Should be a unary predicate", bound.isUnaryPredicate());

  StructType required = StructType.of(required(20, "s", Types.StringType.get()));
  Assert.assertEquals("IsNull inclusive a required field should be alwaysFalse",
      Expressions.alwaysFalse(), unbound.bind(required));
}

Source File: Literals.java From iceberg with Apache License 2.0

6 votes

@Override
@SuppressWarnings("unchecked")
public <T> Literal<T> to(Type type) {
  switch (type.typeId()) {
    case INTEGER:
      return (Literal<T>) this;
    case LONG:
      return (Literal<T>) new LongLiteral(value().longValue());
    case FLOAT:
      return (Literal<T>) new FloatLiteral(value().floatValue());
    case DOUBLE:
      return (Literal<T>) new DoubleLiteral(value().doubleValue());
    case DATE:
      return (Literal<T>) new DateLiteral(value());
    case DECIMAL:
      int scale = ((Types.DecimalType) type).scale();
      // rounding mode isn't necessary, but pass one to avoid warnings
      return (Literal<T>) new DecimalLiteral(
          BigDecimal.valueOf(value()).setScale(scale, RoundingMode.HALF_UP));
    default:
      return null;
  }
}

Source File: TestFilteredScan.java From iceberg with Apache License 2.0

6 votes

@BeforeClass
public static void startSpark() {
  TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate();

  // define UDFs used by partition tests
  Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4);
  spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$);

  Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone());
  spark.udf().register("ts_day",
      (UDF1<Timestamp, Integer>) timestamp -> day.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone());
  spark.udf().register("ts_hour",
      (UDF1<Timestamp, Integer>) timestamp -> hour.apply((Long) fromJavaTimestamp(timestamp)),
      IntegerType$.MODULE$);

  spark.udf().register("data_ident", (UDF1<String, String>) data -> data, StringType$.MODULE$);
  spark.udf().register("id_ident", (UDF1<Long, Long>) id -> id, LongType$.MODULE$);
}

Source File: TestTruncatesProjection.java From iceberg with Apache License 2.0

6 votes

@Test
public void testBinaryInclusive() throws Exception {
  ByteBuffer value = ByteBuffer.wrap("abcdefg".getBytes("UTF-8"));
  Schema schema = new Schema(optional(1, "value", Types.BinaryType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build();
  String expectedValue = TransformUtil.base64encode(ByteBuffer.wrap("abcde".getBytes("UTF-8")));

  assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, expectedValue);
  assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, expectedValue);
  assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, expectedValue);
  assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, expectedValue);
  assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, expectedValue);
  assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE);

  ByteBuffer anotherValue = ByteBuffer.wrap("abcdehij".getBytes("UTF-8"));
  assertProjectionInclusive(spec, in("value", value, anotherValue),
      Expression.Operation.IN, String.format("[%s, %s]", expectedValue, expectedValue));
  assertProjectionInclusiveValue(spec, notIn("value", value, anotherValue), Expression.Operation.TRUE);
}

Source File: ParquetSchemaUtil.java From iceberg with Apache License 2.0

6 votes

/**
 * Prunes columns from a Parquet file schema that was written without field ids.
 * <p>
 * Files that were written without field ids are read assuming that schema evolution preserved
 * column order. Deleting columns was not allowed.
 * <p>
 * The order of columns in the resulting Parquet schema matches the Parquet file.
 *
 * @param fileSchema schema from a Parquet file that does not have field ids.
 * @param expectedSchema expected schema
 * @return a parquet schema pruned using the expected schema
 */
public static MessageType pruneColumnsFallback(MessageType fileSchema, Schema expectedSchema) {
  Set<Integer> selectedIds = Sets.newHashSet();

  for (Types.NestedField field : expectedSchema.columns()) {
    selectedIds.add(field.fieldId());
  }

  MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage();

  int ordinal = 1;
  for (Type type : fileSchema.getFields()) {
    if (selectedIds.contains(ordinal)) {
      builder.addField(type.withId(ordinal));
    }
    ordinal += 1;
  }

  return builder.named(fileSchema.getName());
}

Source File: TestHelpers.java From iceberg with Apache License 2.0

6 votes

private static void assertEqualsSafe(Types.MapType map,
                                     Map<?, ?> expected, Map<?, ?> actual) {
  Type keyType = map.keyType();
  Type valueType = map.valueType();

  for (Object expectedKey : expected.keySet()) {
    Object matchingKey = null;
    for (Object actualKey : actual.keySet()) {
      try {
        assertEqualsSafe(keyType, expectedKey, actualKey);
        matchingKey = actualKey;
      } catch (AssertionError e) {
        // failed
      }
    }

    Assert.assertNotNull("Should have a matching key", matchingKey);
    assertEqualsSafe(valueType, expected.get(expectedKey), actual.get(matchingKey));
  }
}

Source File: TestTruncatesProjection.java From iceberg with Apache License 2.0

6 votes

@Test
public void testLongStrictUpperBound() {
  Long value = 99L;
  Schema schema = new Schema(optional(1, "value", Types.LongType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build();

  assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "90");
  assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100");
  assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "90");
  assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "90");
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "90");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);

  assertProjectionStrict(spec, notIn("value", value - 1, value, value + 1),
      Expression.Operation.NOT_IN, "[90, 90, 100]");
  assertProjectionStrictValue(spec, in("value", value, value - 1), Expression.Operation.FALSE);
}

Source File: TestBucketingProjection.java From iceberg with Apache License 2.0

6 votes

@Test
public void testBucketUUIDStrict() {
  UUID value = new UUID(123L, 456L);
  Schema schema = new Schema(optional(1, "value", Types.UUIDType.get()));
  PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 10).build();

  // the bucket number of the value (i.e. UUID(123L, 456L)) is 4
  assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "4");
  assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, lessThanOrEqual("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThan("value", value), Expression.Operation.FALSE);
  assertProjectionStrictValue(spec, greaterThanOrEqual("value", value), Expression.Operation.FALSE);

  UUID anotherValue = new UUID(456L, 123L);
  assertProjectionStrict(spec, notIn("value", value, anotherValue),
      Expression.Operation.NOT_IN, "[4, 6]");
  assertProjectionStrictValue(spec, in("value", value, anotherValue), Expression.Operation.FALSE);
}

Source File: OrcSchemaWithTypeVisitor.java From iceberg with Apache License 2.0

6 votes

public static <T> T visit(Type iType, TypeDescription schema, OrcSchemaWithTypeVisitor<T> visitor) {
  switch (schema.getCategory()) {
    case STRUCT:
      return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);

    case UNION:
      throw new UnsupportedOperationException("Cannot handle " + schema);

    case LIST:
      Types.ListType list = iType != null ? iType.asListType() : null;
      return visitor.list(
          list, schema,
          visit(list.elementType(), schema.getChildren().get(0), visitor));

    case MAP:
      Types.MapType map = iType != null ? iType.asMapType() : null;
      return visitor.map(
          map, schema,
          visit(map != null ? map.keyType() : null, schema.getChildren().get(0), visitor),
          visit(map != null ? map.valueType() : null, schema.getChildren().get(1), visitor));

    default:
      return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
  }
}

Source File: GenericManifestFile.java From iceberg with Apache License 2.0

6 votes

/**
 * Used by Avro reflection to instantiate this class when reading manifest files.
 */
public GenericManifestFile(org.apache.avro.Schema avroSchema) {
  this.avroSchema = avroSchema;

  List<Types.NestedField> fields = AvroSchemaUtil.convert(avroSchema).asStructType().fields();
  List<Types.NestedField> allFields = ManifestFile.schema().asStruct().fields();

  this.fromProjectionPos = new int[fields.size()];
  for (int i = 0; i < fromProjectionPos.length; i += 1) {
    boolean found = false;
    for (int j = 0; j < allFields.size(); j += 1) {
      if (fields.get(i).fieldId() == allFields.get(j).fieldId()) {
        found = true;
        fromProjectionPos[i] = j;
      }
    }

    if (!found) {
      throw new IllegalArgumentException("Cannot find projected field: " + fields.get(i));
    }
  }
}

Source File: TestSchemaUpdate.java From iceberg with Apache License 2.0

6 votes

@Test
public void testMoveNestedFieldBeforeFirst() {
  Schema schema = new Schema(
      required(1, "id", Types.LongType.get()),
      required(2, "struct", Types.StructType.of(
          required(3, "count", Types.LongType.get()),
          required(4, "data", Types.StringType.get()))));
  Schema expected = new Schema(
      required(1, "id", Types.LongType.get()),
      required(2, "struct", Types.StructType.of(
          required(4, "data", Types.StringType.get()),
          required(3, "count", Types.LongType.get()))));

  Schema actual = new SchemaUpdate(schema, 4)
      .moveBefore("struct.data", "struct.count")
      .apply();

  Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct());
}

Source File: TestPredicateBinding.java From iceberg with Apache License 2.0

5 votes

@Test
public void testNotInPredicateBindingConversionDedupToNotEq() {
  StructType struct = StructType.of(required(15, "d", Types.DecimalType.of(9, 2)));
  UnboundPredicate<Double> unbound = Expressions.notIn("d", 12.40, 12.401, 12.402);
  Assert.assertEquals("Should create a NOT_IN unbound predicate", NOT_IN, unbound.op());

  Expression expr = unbound.bind(struct);
  BoundPredicate<BigDecimal> bound = assertAndUnwrap(expr);
  Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate());
  Assert.assertEquals("Should convert literal set values to a single decimal",
      new BigDecimal("12.40"), bound.asLiteralPredicate().literal().value());
  Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId());
  Assert.assertEquals("Should change the NOT_IN operation to NOT_EQ", NOT_EQ, bound.op());
}

Source File: TestHelpers.java From iceberg with Apache License 2.0

5 votes

public static void assertEqualsSafe(Types.StructType struct, Record rec, Row row) {
  List<Types.NestedField> fields = struct.fields();
  for (int i = 0; i < fields.size(); i += 1) {
    Type fieldType = fields.get(i).type();

    Object expectedValue = rec.get(i);
    Object actualValue = row.get(i);

    assertEqualsSafe(fieldType, expectedValue, actualValue);
  }
}

Source File: TestReadProjection.java From iceberg with Apache License 2.0

5 votes

@Test
public void testMapProjection() throws IOException {
  Schema writeSchema = new Schema(
      Types.NestedField.required(0, "id", Types.LongType.get()),
      Types.NestedField.optional(5, "properties",
          Types.MapType.ofOptional(6, 7, Types.StringType.get(), Types.StringType.get()))
  );

  Map<String, String> properties = ImmutableMap.of("a", "A", "b", "B");

  Record record = GenericRecord.create(writeSchema.asStruct());
  record.setField("id", 34L);
  record.setField("properties", properties);

  Schema idOnly = new Schema(
      Types.NestedField.required(0, "id", Types.LongType.get())
  );

  Record projected = writeAndRead("id_only", writeSchema, idOnly, record);
  Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id"));
  Assert.assertNull("Should not project properties map", projected.getField("properties"));

  Schema keyOnly = writeSchema.select("properties.key");
  projected = writeAndRead("key_only", writeSchema, keyOnly, record);
  Assert.assertNull("Should not project id", projected.getField("id"));
  Assert.assertEquals("Should project entire map",
      properties, toStringMap((Map) projected.getField("properties")));

  Schema valueOnly = writeSchema.select("properties.value");
  projected = writeAndRead("value_only", writeSchema, valueOnly, record);
  Assert.assertNull("Should not project id", projected.getField("id"));
  Assert.assertEquals("Should project entire map",
      properties, toStringMap((Map) projected.getField("properties")));

  Schema mapOnly = writeSchema.select("properties");
  projected = writeAndRead("map_only", writeSchema, mapOnly, record);
  Assert.assertNull("Should not project id", projected.getField("id"));
  Assert.assertEquals("Should project entire map",
      properties, toStringMap((Map) projected.getField("properties")));
}

Source File: TestBuildOrcProjection.java From iceberg with Apache License 2.0

5 votes

@Test
public void testProjectionNested() {
  Types.StructType nestedStructType = Types.StructType.of(
      optional(2, "b", Types.StringType.get()),
      optional(3, "c", Types.DateType.get())
  );
  Schema originalSchema = new Schema(
      optional(1, "a", nestedStructType)
  );

  // Original mapping (stored in ORC)
  TypeDescription orcSchema = ORCSchemaUtil.convert(originalSchema);

  // Evolve schema
  Types.StructType newNestedStructType = Types.StructType.of(
      optional(3, "cc", Types.DateType.get()),
      optional(2, "bb", Types.StringType.get())
  );
  Schema evolveSchema = new Schema(
      optional(1, "aa", newNestedStructType)
  );

  TypeDescription newOrcSchema = ORCSchemaUtil.buildOrcProjection(evolveSchema, orcSchema);
  assertEquals(1, newOrcSchema.getChildren().size());
  assertEquals(TypeDescription.Category.STRUCT, newOrcSchema.findSubtype("a").getCategory());
  TypeDescription nestedCol = newOrcSchema.findSubtype("a");
  assertEquals(2, nestedCol.findSubtype("c").getId());
  assertEquals(TypeDescription.Category.DATE, nestedCol.findSubtype("c").getCategory());
  assertEquals(3, nestedCol.findSubtype("b").getId());
  assertEquals(TypeDescription.Category.STRING, nestedCol.findSubtype("b").getCategory());
}

Source File: SchemaEvolutionTest.java From iceberg with Apache License 2.0

5 votes

@Test
public void floatToDouble() throws IOException {
  // Set up a new table to test this conversion
  Schema schema = new Schema(optional(1, "float", Types.FloatType.get()));
  File location = Files.createTempDirectory("temp").toFile();
  HadoopTables tables = new HadoopTables(spark.sparkContext().hadoopConfiguration());
  Table floatTable = tables.create(schema, location.toString());

  floatTable.updateSchema().updateColumn("float", Types.DoubleType.get()).commit();

  log.info("Promote float type to double type:\n" + floatTable.schema().toString());
}

Source File: FlinkTypeToType.java From iceberg with Apache License 2.0

5 votes

@SuppressWarnings("checkstyle:CyclomaticComplexity")
@Override
public Type atomic(AtomicDataType type) {
  LogicalType inner = type.getLogicalType();
  if (inner instanceof VarCharType ||
      inner instanceof CharType) {
    return Types.StringType.get();
  } else if (inner instanceof BooleanType) {
    return Types.BooleanType.get();
  } else if (inner instanceof IntType ||
      inner instanceof SmallIntType ||
      inner instanceof TinyIntType) {
    return Types.IntegerType.get();
  } else if (inner instanceof BigIntType) {
    return Types.LongType.get();
  } else if (inner instanceof VarBinaryType) {
    return Types.BinaryType.get();
  } else if (inner instanceof BinaryType) {
    BinaryType binaryType = (BinaryType) inner;
    return Types.FixedType.ofLength(binaryType.getLength());
  } else if (inner instanceof FloatType) {
    return Types.FloatType.get();
  } else if (inner instanceof DoubleType) {
    return Types.DoubleType.get();
  } else if (inner instanceof DateType) {
    return Types.DateType.get();
  } else if (inner instanceof TimeType) {
    return Types.TimeType.get();
  } else if (inner instanceof TimestampType) {
    return Types.TimestampType.withoutZone();
  } else if (inner instanceof LocalZonedTimestampType) {
    return Types.TimestampType.withZone();
  } else if (inner instanceof DecimalType) {
    DecimalType decimalType = (DecimalType) inner;
    return Types.DecimalType.of(decimalType.getPrecision(), decimalType.getScale());
  } else {
    throw new UnsupportedOperationException("Not a supported type: " + type.toString());
  }
}

Source File: TestLocalScan.java From iceberg with Apache License 2.0

5 votes

@Test
public void testFilterWithDateAndTimestamp() throws IOException {
  // TODO: Add multiple timestamp tests - there's an issue with ORC caching TZ in ThreadLocal, so it's not possible
  //   to change TZ and test with ORC as they will produce incompatible values.
  Schema schema = new Schema(
      required(1, "timestamp_with_zone", Types.TimestampType.withZone()),
      required(2, "timestamp_without_zone", Types.TimestampType.withoutZone()),
      required(3, "date", Types.DateType.get()),
      required(4, "time", Types.TimeType.get())
  );

  File tableLocation = temp.newFolder("complex_filter_table");
  Assert.assertTrue(tableLocation.delete());

  Table table = TABLES.create(
      schema, PartitionSpec.unpartitioned(),
      ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()),
      tableLocation.getAbsolutePath());

  List<Record> expected = RandomGenericData.generate(schema, 100, 435691832918L);
  DataFile file = writeFile(tableLocation.toString(), format.addExtension("record-file"), schema, expected);
  table.newFastAppend().appendFile(file).commit();

  for (Record r : expected) {
    Iterable<Record> filterResult = IcebergGenerics.read(table)
        .where(equal("timestamp_with_zone", r.getField("timestamp_with_zone").toString()))
        .where(equal("timestamp_without_zone", r.getField("timestamp_without_zone").toString()))
        .where(equal("date", r.getField("date").toString()))
        .where(equal("time", r.getField("time").toString()))
        .build();

    Assert.assertTrue(filterResult.iterator().hasNext());
    Record readRecord = filterResult.iterator().next();
    Assert.assertEquals(r.getField("timestamp_with_zone"), readRecord.getField("timestamp_with_zone"));
  }
}

Source File: SnapshotFunctionalityTest.java From iceberg with Apache License 2.0

5 votes

@Before
public void before() throws IOException {
  Schema schema = new Schema(
      optional(1, "id", Types.IntegerType.get()),
      optional(2, "data", Types.StringType.get())
  );

  spark = SparkSession.builder().master("local[2]").getOrCreate();

  tableLocation = Files.createTempDirectory("temp").toFile();

  HadoopTables tables = new HadoopTables(spark.sparkContext().hadoopConfiguration());
  PartitionSpec spec = PartitionSpec.unpartitioned();
  table = tables.create(schema, spec, tableLocation.toString());

  List<SimpleRecord> expected = Lists.newArrayList(
      new SimpleRecord(1, "a"),
      new SimpleRecord(2, "b"),
      new SimpleRecord(3, "c")
  );

  Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.class);

  for (int i = 0; i < 5; i++) {
    df.select("id", "data").write()
        .format("iceberg")
        .mode("append")
        .save(tableLocation.toString());
  }
  table.refresh();
}

Source File: TestSchemaUpdate.java From iceberg with Apache License 2.0

5 votes

@Test
public void testMoveTopLevelColumnAfterLast() {
  Schema schema = new Schema(
      required(1, "id", Types.LongType.get()),
      required(2, "data", Types.StringType.get()));
  Schema expected = new Schema(
      required(2, "data", Types.StringType.get()),
      required(1, "id", Types.LongType.get()));

  Schema actual = new SchemaUpdate(schema, 2)
      .moveAfter("id", "data")
      .apply();

  Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct());
}

Source File: TestDataTableScan.java From iceberg with Apache License 2.0

5 votes

@Test
public void testTableScanHonorsSelect() {
  TableScan scan = table.newScan().select("id");

  Schema expectedSchema = new Schema(required(1, "id", Types.IntegerType.get()));

  assertEquals("A tableScan.select() should prune the schema",
      expectedSchema.asStruct(),
      scan.schema().asStruct());
}

Source File: RewriteManifestsAction.java From iceberg with Apache License 2.0

5 votes

private static ManifestFile writeManifest(
    List<Row> rows, int startIndex, int endIndex, Broadcast<FileIO> io,
    String location, int format, PartitionSpec spec, StructType sparkType) throws IOException {

  String manifestName = "optimized-m-" + UUID.randomUUID();
  Path manifestPath = new Path(location, manifestName);
  OutputFile outputFile = io.value().newOutputFile(FileFormat.AVRO.addExtension(manifestPath.toString()));

  Types.StructType dataFileType = DataFile.getType(spec.partitionType());
  SparkDataFile wrapper = new SparkDataFile(dataFileType, sparkType);

  ManifestWriter writer = ManifestFiles.write(format, spec, outputFile, null);

  try {
    for (int index = startIndex; index < endIndex; index++) {
      Row row = rows.get(index);
      long snapshotId = row.getLong(0);
      long sequenceNumber = row.getLong(1);
      Row file = row.getStruct(2);
      writer.existing(wrapper.wrap(file), snapshotId, sequenceNumber);
    }
  } finally {
    writer.close();
  }

  return writer.toManifestFile();
}

Source File: DataTestHelpers.java From iceberg with Apache License 2.0

5 votes

public static void assertEquals(Types.ListType list, List<?> expected, List<?> actual) {
  Type elementType = list.elementType();

  Assert.assertEquals("List size should match", expected.size(), actual.size());

  for (int i = 0; i < expected.size(); i += 1) {
    Object expectedValue = expected.get(i);
    Object actualValue = actual.get(i);

    assertEquals(elementType, expectedValue, actualValue);
  }
}

Source File: StrictMetricsEvaluator.java From iceberg with Apache License 2.0

5 votes

@Override
public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
  Integer id = ref.fieldId();
  Types.NestedField field = struct.field(id);
  Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));

  if (canContainNulls(id)) {
    return ROWS_MIGHT_NOT_MATCH;
  }

  if (lowerBounds != null && lowerBounds.containsKey(id) &&
      upperBounds != null && upperBounds.containsKey(id)) {
    // similar to the implementation in eq, first check if the lower bound is in the set
    T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
    if (!literalSet.contains(lower)) {
      return ROWS_MIGHT_NOT_MATCH;
    }

    // check if the upper bound is in the set
    T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));
    if (!literalSet.contains(upper)) {
      return ROWS_MIGHT_NOT_MATCH;
    }

    // finally check if the lower bound and the upper bound are equal
    if (ref.comparator().compare(lower, upper) != 0) {
      return ROWS_MIGHT_NOT_MATCH;
    }

    // All values must be in the set if the lower bound and the upper bound are in the set and are equal.
    return ROWS_MUST_MATCH;
  }

  return ROWS_MIGHT_NOT_MATCH;
}

Source File: ParquetAvroValueReaders.java From iceberg with Apache License 2.0

5 votes

@Override
public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array,
                                  ParquetValueReader<?> elementReader) {
  GroupType repeated = array.getFields().get(0).asGroupType();
  String[] repeatedPath = currentPath();

  int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1;
  int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1;

  Type elementType = repeated.getType(0);
  int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1;

  return new ListReader<>(repeatedD, repeatedR, ParquetValueReaders.option(elementType, elementD, elementReader));
}

org.apache.iceberg.types.Types Java Examples