org.apache.iceberg.types.Types Java Examples
The following examples show how to use
org.apache.iceberg.types.Types.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnboundTransform.java From iceberg with Apache License 2.0 | 8 votes |
@SuppressWarnings("unchecked") @Override public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive) { BoundReference<S> boundRef = ref.bind(struct, caseSensitive); Transform<S, T> typeTransform; try { // TODO: Avoid using toString/fromString typeTransform = (Transform<S, T>) Transforms.fromString(boundRef.type(), transform.toString()); ValidationException.check(typeTransform.canTransform(boundRef.type()), "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name()); } catch (IllegalArgumentException e) { throw new ValidationException( "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref.name()); } return new BoundTransform<>(boundRef, typeTransform); }
Example #2
Source File: AvroSchemaWithTypeVisitor.java From iceberg with Apache License 2.0 | 6 votes |
public static <T> T visit(Type iType, Schema schema, AvroSchemaWithTypeVisitor<T> visitor) { switch (schema.getType()) { case RECORD: return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor); case UNION: return visitUnion(iType, schema, visitor); case ARRAY: return visitArray(iType, schema, visitor); case MAP: Types.MapType map = iType != null ? iType.asMapType() : null; return visitor.map(map, schema, visit(map != null ? map.valueType() : null, schema.getValueType(), visitor)); default: return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema); } }
Example #3
Source File: ArrowSchemaUtilTest.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void convertPrimitive() { Schema iceberg = new Schema( Types.NestedField.optional(0, INTEGER_FIELD, IntegerType.get()), Types.NestedField.optional(1, BOOLEAN_FIELD, BooleanType.get()), Types.NestedField.required(2, DOUBLE_FIELD, DoubleType.get()), Types.NestedField.required(3, STRING_FIELD, StringType.get()), Types.NestedField.optional(4, DATE_FIELD, DateType.get()), Types.NestedField.optional(5, TIMESTAMP_FIELD, TimestampType.withZone()), Types.NestedField.optional(6, LONG_FIELD, LongType.get()), Types.NestedField.optional(7, FLOAT_FIELD, FloatType.get()), Types.NestedField.optional(8, TIME_FIELD, TimeType.get()), Types.NestedField.optional(9, BINARY_FIELD, Types.BinaryType.get()), Types.NestedField.optional(10, DECIMAL_FIELD, Types.DecimalType.of(1, 1)), Types.NestedField.optional(12, LIST_FIELD, Types.ListType.ofOptional(13, Types.IntegerType.get())), Types.NestedField.required(14, MAP_FIELD, Types.MapType.ofOptional(15, 16, StringType.get(), IntegerType.get())), Types.NestedField.optional(17, FIXED_WIDTH_BINARY_FIELD, Types.FixedType.ofLength(10))); org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg); validate(iceberg, arrow); }
Example #4
Source File: TestReadProjection.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testEmptyProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(schema); record.setField("id", 34L); record.setField("data", "test"); Record projected = writeAndRead("empty_projection", schema, schema.select(), record); Assert.assertNotNull("Should read a non-null record", projected); try { projected.get(0); Assert.fail("Should not retrieve value with ordinal 0"); } catch (ArrayIndexOutOfBoundsException e) { // this is expected because there are no values } }
Example #5
Source File: PartitionTable.java From presto with Apache License 2.0 | 6 votes |
private static Object convert(Object value, Type type) { if (value == null) { return null; } if (type instanceof Types.StringType) { return value.toString(); } if (type instanceof Types.BinaryType) { // TODO the client sees the bytearray's tostring ouput instead of seeing actual bytes, needs to be fixed. return ((ByteBuffer) value).array(); } if (type instanceof Types.TimestampType) { long utcMillis = TimeUnit.MICROSECONDS.toMillis((Long) value); Types.TimestampType timestampType = (Types.TimestampType) type; if (timestampType.shouldAdjustToUTC()) { return packDateTimeWithZone(utcMillis, TimeZoneKey.UTC_KEY); } return utcMillis; } if (type instanceof Types.FloatType) { return Float.floatToIntBits((Float) value); } return value; }
Example #6
Source File: TestPredicateBinding.java From iceberg with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("unchecked") public void testIsNull() { StructType optional = StructType.of(optional(19, "s", Types.StringType.get())); UnboundPredicate<?> unbound = new UnboundPredicate<>(IS_NULL, ref("s")); Expression expr = unbound.bind(optional); BoundPredicate<?> bound = assertAndUnwrap(expr); Assert.assertEquals("Should use the same operation", IS_NULL, bound.op()); Assert.assertEquals("Should use the correct field", 19, bound.ref().fieldId()); Assert.assertTrue("Should be a unary predicate", bound.isUnaryPredicate()); StructType required = StructType.of(required(20, "s", Types.StringType.get())); Assert.assertEquals("IsNull inclusive a required field should be alwaysFalse", Expressions.alwaysFalse(), unbound.bind(required)); }
Example #7
Source File: Literals.java From iceberg with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public <T> Literal<T> to(Type type) { switch (type.typeId()) { case INTEGER: return (Literal<T>) this; case LONG: return (Literal<T>) new LongLiteral(value().longValue()); case FLOAT: return (Literal<T>) new FloatLiteral(value().floatValue()); case DOUBLE: return (Literal<T>) new DoubleLiteral(value().doubleValue()); case DATE: return (Literal<T>) new DateLiteral(value()); case DECIMAL: int scale = ((Types.DecimalType) type).scale(); // rounding mode isn't necessary, but pass one to avoid warnings return (Literal<T>) new DecimalLiteral( BigDecimal.valueOf(value()).setScale(scale, RoundingMode.HALF_UP)); default: return null; } }
Example #8
Source File: TestFilteredScan.java From iceberg with Apache License 2.0 | 6 votes |
@BeforeClass public static void startSpark() { TestFilteredScan.spark = SparkSession.builder().master("local[2]").getOrCreate(); // define UDFs used by partition tests Transform<Long, Integer> bucket4 = Transforms.bucket(Types.LongType.get(), 4); spark.udf().register("bucket4", (UDF1<Long, Integer>) bucket4::apply, IntegerType$.MODULE$); Transform<Long, Integer> day = Transforms.day(Types.TimestampType.withZone()); spark.udf().register("ts_day", (UDF1<Timestamp, Integer>) timestamp -> day.apply((Long) fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); Transform<Long, Integer> hour = Transforms.hour(Types.TimestampType.withZone()); spark.udf().register("ts_hour", (UDF1<Timestamp, Integer>) timestamp -> hour.apply((Long) fromJavaTimestamp(timestamp)), IntegerType$.MODULE$); spark.udf().register("data_ident", (UDF1<String, String>) data -> data, StringType$.MODULE$); spark.udf().register("id_ident", (UDF1<Long, Long>) id -> id, LongType$.MODULE$); }
Example #9
Source File: TestTruncatesProjection.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testBinaryInclusive() throws Exception { ByteBuffer value = ByteBuffer.wrap("abcdefg".getBytes("UTF-8")); Schema schema = new Schema(optional(1, "value", Types.BinaryType.get())); PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 5).build(); String expectedValue = TransformUtil.base64encode(ByteBuffer.wrap("abcde".getBytes("UTF-8"))); assertProjectionInclusive(spec, lessThan("value", value), Expression.Operation.LT_EQ, expectedValue); assertProjectionInclusive(spec, lessThanOrEqual("value", value), Expression.Operation.LT_EQ, expectedValue); assertProjectionInclusive(spec, greaterThan("value", value), Expression.Operation.GT_EQ, expectedValue); assertProjectionInclusive(spec, greaterThanOrEqual("value", value), Expression.Operation.GT_EQ, expectedValue); assertProjectionInclusive(spec, equal("value", value), Expression.Operation.EQ, expectedValue); assertProjectionInclusiveValue(spec, notEqual("value", value), Expression.Operation.TRUE); ByteBuffer anotherValue = ByteBuffer.wrap("abcdehij".getBytes("UTF-8")); assertProjectionInclusive(spec, in("value", value, anotherValue), Expression.Operation.IN, String.format("[%s, %s]", expectedValue, expectedValue)); assertProjectionInclusiveValue(spec, notIn("value", value, anotherValue), Expression.Operation.TRUE); }
Example #10
Source File: ParquetSchemaUtil.java From iceberg with Apache License 2.0 | 6 votes |
/** * Prunes columns from a Parquet file schema that was written without field ids. * <p> * Files that were written without field ids are read assuming that schema evolution preserved * column order. Deleting columns was not allowed. * <p> * The order of columns in the resulting Parquet schema matches the Parquet file. * * @param fileSchema schema from a Parquet file that does not have field ids. * @param expectedSchema expected schema * @return a parquet schema pruned using the expected schema */ public static MessageType pruneColumnsFallback(MessageType fileSchema, Schema expectedSchema) { Set<Integer> selectedIds = Sets.newHashSet(); for (Types.NestedField field : expectedSchema.columns()) { selectedIds.add(field.fieldId()); } MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage(); int ordinal = 1; for (Type type : fileSchema.getFields()) { if (selectedIds.contains(ordinal)) { builder.addField(type.withId(ordinal)); } ordinal += 1; } return builder.named(fileSchema.getName()); }
Example #11
Source File: TestHelpers.java From iceberg with Apache License 2.0 | 6 votes |
private static void assertEqualsSafe(Types.MapType map, Map<?, ?> expected, Map<?, ?> actual) { Type keyType = map.keyType(); Type valueType = map.valueType(); for (Object expectedKey : expected.keySet()) { Object matchingKey = null; for (Object actualKey : actual.keySet()) { try { assertEqualsSafe(keyType, expectedKey, actualKey); matchingKey = actualKey; } catch (AssertionError e) { // failed } } Assert.assertNotNull("Should have a matching key", matchingKey); assertEqualsSafe(valueType, expected.get(expectedKey), actual.get(matchingKey)); } }
Example #12
Source File: TestTruncatesProjection.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testLongStrictUpperBound() { Long value = 99L; Schema schema = new Schema(optional(1, "value", Types.LongType.get())); PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("value", 10).build(); assertProjectionStrict(spec, lessThan("value", value), Expression.Operation.LT, "90"); assertProjectionStrict(spec, lessThanOrEqual("value", value), Expression.Operation.LT, "100"); assertProjectionStrict(spec, greaterThan("value", value), Expression.Operation.GT, "90"); assertProjectionStrict(spec, greaterThanOrEqual("value", value), Expression.Operation.GT, "90"); assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "90"); assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE); assertProjectionStrict(spec, notIn("value", value - 1, value, value + 1), Expression.Operation.NOT_IN, "[90, 90, 100]"); assertProjectionStrictValue(spec, in("value", value, value - 1), Expression.Operation.FALSE); }
Example #13
Source File: TestBucketingProjection.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testBucketUUIDStrict() { UUID value = new UUID(123L, 456L); Schema schema = new Schema(optional(1, "value", Types.UUIDType.get())); PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("value", 10).build(); // the bucket number of the value (i.e. UUID(123L, 456L)) is 4 assertProjectionStrict(spec, notEqual("value", value), Expression.Operation.NOT_EQ, "4"); assertProjectionStrictValue(spec, equal("value", value), Expression.Operation.FALSE); assertProjectionStrictValue(spec, lessThan("value", value), Expression.Operation.FALSE); assertProjectionStrictValue(spec, lessThanOrEqual("value", value), Expression.Operation.FALSE); assertProjectionStrictValue(spec, greaterThan("value", value), Expression.Operation.FALSE); assertProjectionStrictValue(spec, greaterThanOrEqual("value", value), Expression.Operation.FALSE); UUID anotherValue = new UUID(456L, 123L); assertProjectionStrict(spec, notIn("value", value, anotherValue), Expression.Operation.NOT_IN, "[4, 6]"); assertProjectionStrictValue(spec, in("value", value, anotherValue), Expression.Operation.FALSE); }
Example #14
Source File: OrcSchemaWithTypeVisitor.java From iceberg with Apache License 2.0 | 6 votes |
public static <T> T visit(Type iType, TypeDescription schema, OrcSchemaWithTypeVisitor<T> visitor) { switch (schema.getCategory()) { case STRUCT: return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor); case UNION: throw new UnsupportedOperationException("Cannot handle " + schema); case LIST: Types.ListType list = iType != null ? iType.asListType() : null; return visitor.list( list, schema, visit(list.elementType(), schema.getChildren().get(0), visitor)); case MAP: Types.MapType map = iType != null ? iType.asMapType() : null; return visitor.map( map, schema, visit(map != null ? map.keyType() : null, schema.getChildren().get(0), visitor), visit(map != null ? map.valueType() : null, schema.getChildren().get(1), visitor)); default: return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema); } }
Example #15
Source File: GenericManifestFile.java From iceberg with Apache License 2.0 | 6 votes |
/** * Used by Avro reflection to instantiate this class when reading manifest files. */ public GenericManifestFile(org.apache.avro.Schema avroSchema) { this.avroSchema = avroSchema; List<Types.NestedField> fields = AvroSchemaUtil.convert(avroSchema).asStructType().fields(); List<Types.NestedField> allFields = ManifestFile.schema().asStruct().fields(); this.fromProjectionPos = new int[fields.size()]; for (int i = 0; i < fromProjectionPos.length; i += 1) { boolean found = false; for (int j = 0; j < allFields.size(); j += 1) { if (fields.get(i).fieldId() == allFields.get(j).fieldId()) { found = true; fromProjectionPos[i] = j; } } if (!found) { throw new IllegalArgumentException("Cannot find projected field: " + fields.get(i)); } } }
Example #16
Source File: TestSchemaUpdate.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testMoveNestedFieldBeforeFirst() { Schema schema = new Schema( required(1, "id", Types.LongType.get()), required(2, "struct", Types.StructType.of( required(3, "count", Types.LongType.get()), required(4, "data", Types.StringType.get())))); Schema expected = new Schema( required(1, "id", Types.LongType.get()), required(2, "struct", Types.StructType.of( required(4, "data", Types.StringType.get()), required(3, "count", Types.LongType.get())))); Schema actual = new SchemaUpdate(schema, 4) .moveBefore("struct.data", "struct.count") .apply(); Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); }
Example #17
Source File: TestPredicateBinding.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testNotInPredicateBindingConversionDedupToNotEq() { StructType struct = StructType.of(required(15, "d", Types.DecimalType.of(9, 2))); UnboundPredicate<Double> unbound = Expressions.notIn("d", 12.40, 12.401, 12.402); Assert.assertEquals("Should create a NOT_IN unbound predicate", NOT_IN, unbound.op()); Expression expr = unbound.bind(struct); BoundPredicate<BigDecimal> bound = assertAndUnwrap(expr); Assert.assertTrue("Should be a literal predicate", bound.isLiteralPredicate()); Assert.assertEquals("Should convert literal set values to a single decimal", new BigDecimal("12.40"), bound.asLiteralPredicate().literal().value()); Assert.assertEquals("Should reference correct field ID", 15, bound.ref().fieldId()); Assert.assertEquals("Should change the NOT_IN operation to NOT_EQ", NOT_EQ, bound.op()); }
Example #18
Source File: TestHelpers.java From iceberg with Apache License 2.0 | 5 votes |
public static void assertEqualsSafe(Types.StructType struct, Record rec, Row row) { List<Types.NestedField> fields = struct.fields(); for (int i = 0; i < fields.size(); i += 1) { Type fieldType = fields.get(i).type(); Object expectedValue = rec.get(i); Object actualValue = row.get(i); assertEqualsSafe(fieldType, expectedValue, actualValue); } }
Example #19
Source File: TestReadProjection.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testMapProjection() throws IOException { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(5, "properties", Types.MapType.ofOptional(6, 7, Types.StringType.get(), Types.StringType.get())) ); Map<String, String> properties = ImmutableMap.of("a", "A", "b", "B"); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("properties", properties); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project properties map", projected.getField("properties")); Schema keyOnly = writeSchema.select("properties.key"); projected = writeAndRead("key_only", writeSchema, keyOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.getField("properties"))); Schema valueOnly = writeSchema.select("properties.value"); projected = writeAndRead("value_only", writeSchema, valueOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.getField("properties"))); Schema mapOnly = writeSchema.select("properties"); projected = writeAndRead("map_only", writeSchema, mapOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.getField("properties"))); }
Example #20
Source File: TestBuildOrcProjection.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testProjectionNested() { Types.StructType nestedStructType = Types.StructType.of( optional(2, "b", Types.StringType.get()), optional(3, "c", Types.DateType.get()) ); Schema originalSchema = new Schema( optional(1, "a", nestedStructType) ); // Original mapping (stored in ORC) TypeDescription orcSchema = ORCSchemaUtil.convert(originalSchema); // Evolve schema Types.StructType newNestedStructType = Types.StructType.of( optional(3, "cc", Types.DateType.get()), optional(2, "bb", Types.StringType.get()) ); Schema evolveSchema = new Schema( optional(1, "aa", newNestedStructType) ); TypeDescription newOrcSchema = ORCSchemaUtil.buildOrcProjection(evolveSchema, orcSchema); assertEquals(1, newOrcSchema.getChildren().size()); assertEquals(TypeDescription.Category.STRUCT, newOrcSchema.findSubtype("a").getCategory()); TypeDescription nestedCol = newOrcSchema.findSubtype("a"); assertEquals(2, nestedCol.findSubtype("c").getId()); assertEquals(TypeDescription.Category.DATE, nestedCol.findSubtype("c").getCategory()); assertEquals(3, nestedCol.findSubtype("b").getId()); assertEquals(TypeDescription.Category.STRING, nestedCol.findSubtype("b").getCategory()); }
Example #21
Source File: SchemaEvolutionTest.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void floatToDouble() throws IOException { // Set up a new table to test this conversion Schema schema = new Schema(optional(1, "float", Types.FloatType.get())); File location = Files.createTempDirectory("temp").toFile(); HadoopTables tables = new HadoopTables(spark.sparkContext().hadoopConfiguration()); Table floatTable = tables.create(schema, location.toString()); floatTable.updateSchema().updateColumn("float", Types.DoubleType.get()).commit(); log.info("Promote float type to double type:\n" + floatTable.schema().toString()); }
Example #22
Source File: FlinkTypeToType.java From iceberg with Apache License 2.0 | 5 votes |
@SuppressWarnings("checkstyle:CyclomaticComplexity") @Override public Type atomic(AtomicDataType type) { LogicalType inner = type.getLogicalType(); if (inner instanceof VarCharType || inner instanceof CharType) { return Types.StringType.get(); } else if (inner instanceof BooleanType) { return Types.BooleanType.get(); } else if (inner instanceof IntType || inner instanceof SmallIntType || inner instanceof TinyIntType) { return Types.IntegerType.get(); } else if (inner instanceof BigIntType) { return Types.LongType.get(); } else if (inner instanceof VarBinaryType) { return Types.BinaryType.get(); } else if (inner instanceof BinaryType) { BinaryType binaryType = (BinaryType) inner; return Types.FixedType.ofLength(binaryType.getLength()); } else if (inner instanceof FloatType) { return Types.FloatType.get(); } else if (inner instanceof DoubleType) { return Types.DoubleType.get(); } else if (inner instanceof DateType) { return Types.DateType.get(); } else if (inner instanceof TimeType) { return Types.TimeType.get(); } else if (inner instanceof TimestampType) { return Types.TimestampType.withoutZone(); } else if (inner instanceof LocalZonedTimestampType) { return Types.TimestampType.withZone(); } else if (inner instanceof DecimalType) { DecimalType decimalType = (DecimalType) inner; return Types.DecimalType.of(decimalType.getPrecision(), decimalType.getScale()); } else { throw new UnsupportedOperationException("Not a supported type: " + type.toString()); } }
Example #23
Source File: TestLocalScan.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testFilterWithDateAndTimestamp() throws IOException { // TODO: Add multiple timestamp tests - there's an issue with ORC caching TZ in ThreadLocal, so it's not possible // to change TZ and test with ORC as they will produce incompatible values. Schema schema = new Schema( required(1, "timestamp_with_zone", Types.TimestampType.withZone()), required(2, "timestamp_without_zone", Types.TimestampType.withoutZone()), required(3, "date", Types.DateType.get()), required(4, "time", Types.TimeType.get()) ); File tableLocation = temp.newFolder("complex_filter_table"); Assert.assertTrue(tableLocation.delete()); Table table = TABLES.create( schema, PartitionSpec.unpartitioned(), ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()), tableLocation.getAbsolutePath()); List<Record> expected = RandomGenericData.generate(schema, 100, 435691832918L); DataFile file = writeFile(tableLocation.toString(), format.addExtension("record-file"), schema, expected); table.newFastAppend().appendFile(file).commit(); for (Record r : expected) { Iterable<Record> filterResult = IcebergGenerics.read(table) .where(equal("timestamp_with_zone", r.getField("timestamp_with_zone").toString())) .where(equal("timestamp_without_zone", r.getField("timestamp_without_zone").toString())) .where(equal("date", r.getField("date").toString())) .where(equal("time", r.getField("time").toString())) .build(); Assert.assertTrue(filterResult.iterator().hasNext()); Record readRecord = filterResult.iterator().next(); Assert.assertEquals(r.getField("timestamp_with_zone"), readRecord.getField("timestamp_with_zone")); } }
Example #24
Source File: SnapshotFunctionalityTest.java From iceberg with Apache License 2.0 | 5 votes |
@Before public void before() throws IOException { Schema schema = new Schema( optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get()) ); spark = SparkSession.builder().master("local[2]").getOrCreate(); tableLocation = Files.createTempDirectory("temp").toFile(); HadoopTables tables = new HadoopTables(spark.sparkContext().hadoopConfiguration()); PartitionSpec spec = PartitionSpec.unpartitioned(); table = tables.create(schema, spec, tableLocation.toString()); List<SimpleRecord> expected = Lists.newArrayList( new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c") ); Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.class); for (int i = 0; i < 5; i++) { df.select("id", "data").write() .format("iceberg") .mode("append") .save(tableLocation.toString()); } table.refresh(); }
Example #25
Source File: TestSchemaUpdate.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testMoveTopLevelColumnAfterLast() { Schema schema = new Schema( required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); Schema expected = new Schema( required(2, "data", Types.StringType.get()), required(1, "id", Types.LongType.get())); Schema actual = new SchemaUpdate(schema, 2) .moveAfter("id", "data") .apply(); Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); }
Example #26
Source File: TestDataTableScan.java From iceberg with Apache License 2.0 | 5 votes |
@Test public void testTableScanHonorsSelect() { TableScan scan = table.newScan().select("id"); Schema expectedSchema = new Schema(required(1, "id", Types.IntegerType.get())); assertEquals("A tableScan.select() should prune the schema", expectedSchema.asStruct(), scan.schema().asStruct()); }
Example #27
Source File: RewriteManifestsAction.java From iceberg with Apache License 2.0 | 5 votes |
private static ManifestFile writeManifest( List<Row> rows, int startIndex, int endIndex, Broadcast<FileIO> io, String location, int format, PartitionSpec spec, StructType sparkType) throws IOException { String manifestName = "optimized-m-" + UUID.randomUUID(); Path manifestPath = new Path(location, manifestName); OutputFile outputFile = io.value().newOutputFile(FileFormat.AVRO.addExtension(manifestPath.toString())); Types.StructType dataFileType = DataFile.getType(spec.partitionType()); SparkDataFile wrapper = new SparkDataFile(dataFileType, sparkType); ManifestWriter writer = ManifestFiles.write(format, spec, outputFile, null); try { for (int index = startIndex; index < endIndex; index++) { Row row = rows.get(index); long snapshotId = row.getLong(0); long sequenceNumber = row.getLong(1); Row file = row.getStruct(2); writer.existing(wrapper.wrap(file), snapshotId, sequenceNumber); } } finally { writer.close(); } return writer.toManifestFile(); }
Example #28
Source File: DataTestHelpers.java From iceberg with Apache License 2.0 | 5 votes |
public static void assertEquals(Types.ListType list, List<?> expected, List<?> actual) { Type elementType = list.elementType(); Assert.assertEquals("List size should match", expected.size(), actual.size()); for (int i = 0; i < expected.size(); i += 1) { Object expectedValue = expected.get(i); Object actualValue = actual.get(i); assertEquals(elementType, expectedValue, actualValue); } }
Example #29
Source File: StrictMetricsEvaluator.java From iceberg with Apache License 2.0 | 5 votes |
@Override public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (canContainNulls(id)) { return ROWS_MIGHT_NOT_MATCH; } if (lowerBounds != null && lowerBounds.containsKey(id) && upperBounds != null && upperBounds.containsKey(id)) { // similar to the implementation in eq, first check if the lower bound is in the set T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id)); if (!literalSet.contains(lower)) { return ROWS_MIGHT_NOT_MATCH; } // check if the upper bound is in the set T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id)); if (!literalSet.contains(upper)) { return ROWS_MIGHT_NOT_MATCH; } // finally check if the lower bound and the upper bound are equal if (ref.comparator().compare(lower, upper) != 0) { return ROWS_MIGHT_NOT_MATCH; } // All values must be in the set if the lower bound and the upper bound are in the set and are equal. return ROWS_MUST_MATCH; } return ROWS_MIGHT_NOT_MATCH; }
Example #30
Source File: ParquetAvroValueReaders.java From iceberg with Apache License 2.0 | 5 votes |
@Override public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1; int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1; Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1; return new ListReader<>(repeatedD, repeatedR, ParquetValueReaders.option(elementType, elementD, elementReader)); }