org.apache.iceberg.StructLike Java Examples
The following examples show how to use
org.apache.iceberg.StructLike.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ManifestFileUtil.java From iceberg with Apache License 2.0 | 6 votes |
public static boolean canContainAny(ManifestFile manifest, Iterable<StructLike> partitions, Function<Integer, PartitionSpec> specLookup) { if (manifest.partitions() == null) { return true; } Types.StructType partitionType = specLookup.apply(manifest.partitionSpecId()).partitionType(); List<ManifestFile.PartitionFieldSummary> fieldSummaries = manifest.partitions(); List<Types.NestedField> fields = partitionType.fields(); List<FieldSummary<?>> summaries = Lists.newArrayListWithExpectedSize(fieldSummaries.size()); for (int pos = 0; pos < fieldSummaries.size(); pos += 1) { Type.PrimitiveType primitive = fields.get(pos).type().asPrimitiveType(); summaries.add(new FieldSummary<>(primitive, fieldSummaries.get(pos))); } for (StructLike partition : partitions) { if (canContain(summaries, partition)) { return true; } } return false; }
Example #2
Source File: InternalRecordWrapper.java From iceberg with Apache License 2.0 | 6 votes |
private static Function<Object, Object> converter(Type type) { switch (type.typeId()) { case DATE: return date -> DateTimeUtil.daysFromDate((LocalDate) date); case TIME: return time -> DateTimeUtil.microsFromTime((LocalTime) time); case TIMESTAMP: if (((Types.TimestampType) type).shouldAdjustToUTC()) { return timestamp -> DateTimeUtil.microsFromTimestamptz((OffsetDateTime) timestamp); } else { return timestamp -> DateTimeUtil.microsFromTimestamp((LocalDateTime) timestamp); } case FIXED: return bytes -> ByteBuffer.wrap((byte[]) bytes); case STRUCT: InternalRecordWrapper wrapper = new InternalRecordWrapper(type.asStructType()); return struct -> wrapper.wrap((StructLike) struct); default: } return null; }
Example #3
Source File: TestIcebergPartitionData.java From dremio-oss with Apache License 2.0 | 5 votes |
private void verifyPartitionValue(PartitionSpec partitionSpec, IcebergPartitionData partitionData, String columnName, Class expectedClass, Object expectedValue) throws Exception { File tableFolder = new File(folder.getRoot(), "icebergPartitionTest"); try { tableFolder.mkdir(); File dataFile = new File(folder.getRoot(), "a.parquet"); dataFile.createNewFile(); DataFile d1 = DataFiles.builder(partitionSpec) .withInputFile(Files.localInput(dataFile)) .withRecordCount(50) .withFormat(FileFormat.PARQUET) .withPartition(partitionData) .build(); IcebergOpCommitter committer = IcebergOperation.getCreateTableCommitter(Path.of(tableFolder.toPath().toString()), (new SchemaConverter()).fromIceberg(schema), Lists.newArrayList(columnName), new Configuration()); committer.consumeData(Lists.newArrayList(d1)); committer.commit(); Table table = new HadoopTables(new Configuration()).load(tableFolder.getPath()); for (FileScanTask fileScanTask : table.newScan().planFiles()) { StructLike structLike = fileScanTask.file().partition(); if (expectedClass == ByteBuffer.class) { Assert.assertEquals(structLike.get(0, expectedClass).hashCode(), ByteBuffer.wrap((byte[])expectedValue).hashCode()); } else { Assert.assertTrue(structLike.get(0, expectedClass).equals(expectedValue)); } } } finally { tableFolder.delete(); } }
Example #4
Source File: IcebergSplitSource.java From presto with Apache License 2.0 | 5 votes |
private static Map<Integer, String> getPartitionKeys(FileScanTask scanTask) { StructLike partition = scanTask.file().partition(); PartitionSpec spec = scanTask.spec(); Map<PartitionField, Integer> fieldToIndex = getIdentityPartitions(spec); Map<Integer, String> partitionKeys = new HashMap<>(); fieldToIndex.forEach((field, index) -> { int id = field.sourceId(); Type type = spec.schema().findType(id); Class<?> javaClass = type.typeId().javaClass(); Object value = partition.get(index, javaClass); if (value == null) { partitionKeys.put(id, null); } else { String partitionValue; if (type.typeId() == FIXED || type.typeId() == BINARY) { // this is safe because Iceberg PartitionData directly wraps the byte array partitionValue = new String(((ByteBuffer) value).array(), UTF_8); } else { partitionValue = value.toString(); } partitionKeys.put(id, partitionValue); } }); return Collections.unmodifiableMap(partitionKeys); }
Example #5
Source File: TestIcebergCTASWithPartition.java From dremio-oss with Apache License 2.0 | 5 votes |
private void verifyPartitionValue(String tableFolder, Class expectedClass, Object expectedValue) { Table table = new HadoopTables(new Configuration()).load(tableFolder); for (FileScanTask fileScanTask : table.newScan().planFiles()) { StructLike structLike = fileScanTask.file().partition(); Assert.assertEquals(structLike.get(0, expectedClass), expectedValue); } }
Example #6
Source File: TestInsertIntoTable.java From dremio-oss with Apache License 2.0 | 5 votes |
private void checkSinglePartitionValue(File tableFolder, Class expectedClass, Object expectedValue) { Table table = new HadoopTables(new Configuration()).load(tableFolder.getPath()); for (FileScanTask fileScanTask : table.newScan().planFiles()) { StructLike structLike = fileScanTask.file().partition(); Assert.assertTrue(structLike.get(0, expectedClass).equals(expectedValue)); } }
Example #7
Source File: ManifestFileUtil.java From iceberg with Apache License 2.0 | 5 votes |
private static boolean canContain(List<FieldSummary<?>> summaries, StructLike struct) { if (struct.size() != summaries.size()) { return false; } // if any value is not contained, the struct is not contained and this can return early for (int pos = 0; pos < summaries.size(); pos += 1) { Object value = struct.get(pos, Object.class); if (!summaries.get(pos).canContain(value)) { return false; } } return true; }
Example #8
Source File: IcebergInputFormat.java From iceberg with Apache License 2.0 | 5 votes |
private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> iter, Expression residual, Schema readSchema) { boolean applyResidual = !context.getConfiguration().getBoolean(SKIP_RESIDUAL_FILTERING, false); if (applyResidual && residual != null && residual != Expressions.alwaysTrue()) { Evaluator filter = new Evaluator(readSchema.asStruct(), residual, caseSensitive); return CloseableIterable.filter(iter, record -> filter.eval((StructLike) record)); } else { return iter; } }
Example #9
Source File: PartitionTable.java From presto with Apache License 2.0 | 5 votes |
public Partition( StructLike values, long recordCount, long size, Map<Integer, Object> minValues, Map<Integer, Object> maxValues, Map<Integer, Long> nullCounts) { this.values = requireNonNull(values, "values is null"); this.recordCount = recordCount; this.fileCount = 1; this.size = size; if (minValues == null || maxValues == null || nullCounts == null) { this.minValues = null; this.maxValues = null; this.nullCounts = null; corruptedStats = null; } else { this.minValues = new HashMap<>(minValues); this.maxValues = new HashMap<>(maxValues); // we are assuming if minValues is not present, max will be not be present either. this.corruptedStats = nonPartitionPrimitiveColumns.stream() .map(Types.NestedField::fieldId) .filter(id -> !minValues.containsKey(id) && (!nullCounts.containsKey(id) || nullCounts.get(id) != recordCount)) .collect(toImmutableSet()); this.nullCounts = new HashMap<>(nullCounts); hasValidColumnMetrics = true; } }
Example #10
Source File: StructInternalRow.java From iceberg with Apache License 2.0 | 5 votes |
private ArrayData collectionToArrayData(Type elementType, Collection<?> values) { switch (elementType.typeId()) { case BOOLEAN: case INTEGER: case DATE: case TIME: case LONG: case TIMESTAMP: case FLOAT: case DOUBLE: return fillArray(values, array -> (pos, value) -> array[pos] = value); case STRING: return fillArray(values, array -> (BiConsumer<Integer, CharSequence>) (pos, seq) -> array[pos] = UTF8String.fromString(seq.toString())); case FIXED: case BINARY: return fillArray(values, array -> (BiConsumer<Integer, ByteBuffer>) (pos, buf) -> array[pos] = ByteBuffers.toByteArray(buf)); case DECIMAL: return fillArray(values, array -> (BiConsumer<Integer, BigDecimal>) (pos, dec) -> array[pos] = Decimal.apply(dec)); case STRUCT: return fillArray(values, array -> (BiConsumer<Integer, StructLike>) (pos, tuple) -> array[pos] = new StructInternalRow(elementType.asStructType(), tuple)); case LIST: return fillArray(values, array -> (BiConsumer<Integer, Collection<?>>) (pos, list) -> array[pos] = collectionToArrayData(elementType.asListType(), list)); case MAP: return fillArray(values, array -> (BiConsumer<Integer, Map<?, ?>>) (pos, map) -> array[pos] = mapToMapData(elementType.asMapType(), map)); default: throw new UnsupportedOperationException("Unsupported array element type: " + elementType); } }
Example #11
Source File: StructInternalRow.java From iceberg with Apache License 2.0 | 4 votes |
private StructInternalRow(Types.StructType type, StructLike struct) { this.type = type; this.struct = struct; }
Example #12
Source File: InternalRecordWrapper.java From iceberg with Apache License 2.0 | 4 votes |
public InternalRecordWrapper wrap(StructLike record) { this.wrapped = record; return this; }
Example #13
Source File: StructInternalRow.java From iceberg with Apache License 2.0 | 4 votes |
public StructInternalRow setStruct(StructLike newStruct) { this.struct = newStruct; return this; }
Example #14
Source File: StructInternalRow.java From iceberg with Apache License 2.0 | 4 votes |
@Override public InternalRow getStruct(int ordinal, int numFields) { return new StructInternalRow( type.fields().get(ordinal).type().asStructType(), struct.get(ordinal, StructLike.class)); }
Example #15
Source File: SparkDataFile.java From iceberg with Apache License 2.0 | 4 votes |
@Override public StructLike partition() { return wrappedPartition; }
Example #16
Source File: TestSparkDataFile.java From iceberg with Apache License 2.0 | 4 votes |
private void checkStructLike(StructLike expected, StructLike actual) { Assert.assertEquals("Struct size should match", expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { Assert.assertEquals("Struct values must match", expected.get(i, Object.class), actual.get(i, Object.class)); } }
Example #17
Source File: StructLikeWrapper.java From iceberg with Apache License 2.0 | 4 votes |
public static StructLikeWrapper wrap(StructLike struct) { return new StructLikeWrapper(struct); }
Example #18
Source File: StructLikeWrapper.java From iceberg with Apache License 2.0 | 4 votes |
private StructLikeWrapper(StructLike struct) { this.struct = struct; }
Example #19
Source File: StructLikeWrapper.java From iceberg with Apache License 2.0 | 4 votes |
public StructLikeWrapper set(StructLike newStruct) { this.struct = newStruct; return this; }
Example #20
Source File: StructLikeWrapper.java From iceberg with Apache License 2.0 | 4 votes |
public StructLike get() { return struct; }
Example #21
Source File: TestIcebergInputFormat.java From iceberg with Apache License 2.0 | 4 votes |
private DataFile writeFile( Table table, StructLike partitionData, FileFormat fileFormat, List<Record> records) throws IOException { File file = temp.newFile(); Assert.assertTrue(file.delete()); FileAppender<Record> appender; switch (fileFormat) { case AVRO: appender = Avro.write(Files.localOutput(file)) .schema(table.schema()) .createWriterFunc(DataWriter::create) .named(fileFormat.name()) .build(); break; case PARQUET: appender = Parquet.write(Files.localOutput(file)) .schema(table.schema()) .createWriterFunc(GenericParquetWriter::buildWriter) .named(fileFormat.name()) .build(); break; case ORC: appender = ORC.write(Files.localOutput(file)) .schema(table.schema()) .createWriterFunc(GenericOrcWriter::buildWriter) .build(); break; default: throw new UnsupportedOperationException("Cannot write format: " + fileFormat); } try { appender.addAll(records); } finally { appender.close(); } DataFiles.Builder builder = DataFiles.builder(table.spec()) .withPath(file.toString()) .withFormat(format) .withFileSizeInBytes(file.length()) .withMetrics(appender.metrics()); if (partitionData != null) { builder.withPartition(partitionData); } return builder.build(); }
Example #22
Source File: ResidualEvaluator.java From iceberg with Apache License 2.0 | 4 votes |
private Expression eval(StructLike dataStruct) { this.struct = dataStruct; return ExpressionVisitors.visit(expr, this); }
Example #23
Source File: ResidualEvaluator.java From iceberg with Apache License 2.0 | 4 votes |
@Override public Expression residualFor(StructLike ignored) { return expr; }
Example #24
Source File: BoundReference.java From iceberg with Apache License 2.0 | 4 votes |
public Accessor<StructLike> accessor() { return accessor; }
Example #25
Source File: BoundReference.java From iceberg with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings("unchecked") public T eval(StructLike struct) { return (T) accessor.get(struct); }
Example #26
Source File: BoundReference.java From iceberg with Apache License 2.0 | 4 votes |
BoundReference(Types.NestedField field, Accessor<StructLike> accessor) { this.field = field; this.accessor = accessor; }
Example #27
Source File: BoundTransform.java From iceberg with Apache License 2.0 | 4 votes |
@Override public T eval(StructLike struct) { return transform.apply(ref.eval(struct)); }
Example #28
Source File: BoundPredicate.java From iceberg with Apache License 2.0 | 4 votes |
@Override public Boolean eval(StructLike struct) { return test(term().eval(struct)); }
Example #29
Source File: BoundPredicate.java From iceberg with Apache License 2.0 | 4 votes |
public boolean test(StructLike struct) { return test(term().eval(struct)); }
Example #30
Source File: Evaluator.java From iceberg with Apache License 2.0 | 4 votes |
public boolean eval(StructLike data) { return visitor().eval(data); }