org.apache.arrow.vector.types.pojo.Schema Java Exaples

Source File: UserDefinedFunctionRequestSerDe.java From aws-athena-query-federation with Apache License 2.0

6 votes

@Override
protected FederationRequest doTypedDeserialize(JsonParser jparser, DeserializationContext ctxt)
        throws IOException
{
    assertFieldName(jparser, IDENTITY_FIELD);
    FederatedIdentity identity = identityDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, INPUT_RECORDS_FIELD);
    Block inputRecords = blockDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, OUTPUT_SCHEMA_FIELD);
    Schema outputSchema = schemaDeserializer.deserialize(jparser, ctxt);

    String methodName = getNextStringField(jparser, METHOD_NAME_FIELD);
    UserDefinedFunctionType functionType = UserDefinedFunctionType.valueOf(getNextStringField(jparser, FUNCTION_TYPE_FIELD));

    return new UserDefinedFunctionRequest(identity, inputRecords, outputSchema, methodName, functionType);
}

Source File: LambdaMetadataProvider.java From aws-athena-query-federation with Apache License 2.0

6 votes

/**
 * This method builds and executes a GetTableLayoutRequest against the specified Lambda function.
 *
 * @param catalog the catalog name to be passed to Lambda
 * @param tableName the schema-qualified table name indicating the table whose layout should be retrieved
 * @param constraints the constraints to be applied to the request
 * @param schema the schema of the table in question
 * @param partitionCols the partition column names for the table in question
 * @param metadataFunction the name of the Lambda function to call
 * @param identity the identity of the caller
 * @return the response
 */
public static GetTableLayoutResponse getTableLayout(String catalog,
                                   TableName tableName,
                                   Constraints constraints,
                                   Schema schema,
                                   Set<String> partitionCols,
                                   String metadataFunction,
                                   FederatedIdentity identity)
{
  String queryId = generateQueryId();
  log.info("Submitting GetTableLayoutRequest with ID " + queryId);

  try (GetTableLayoutRequest request =
               new GetTableLayoutRequest(identity, queryId, catalog, tableName, constraints, schema, partitionCols)) {
    log.info("Submitting request: {}", request);
    GetTableLayoutResponse response = (GetTableLayoutResponse) getService(metadataFunction, identity, catalog).call(request);
    log.info("Received response: {}", response);
    return response;
  }
  catch (Exception e) {
    throw new RuntimeException(e);
  }
}

Source File: HbaseRecordHandler.java From aws-athena-query-federation with Apache License 2.0

6 votes

private boolean scanFilterProject(ResultScanner scanner, ReadRecordsRequest request, BlockSpiller blockSpiller, QueryStatusChecker queryStatusChecker)
{
    Schema projection = request.getSchema();
    boolean isNative = projection.getCustomMetadata().get(HBASE_NATIVE_STORAGE_FLAG) != null;

    for (Result row : scanner) {
        if (!queryStatusChecker.isQueryRunning()) {
            return true;
        }
        blockSpiller.writeRows((Block block, int rowNum) -> {
            boolean match = true;
            for (Field field : projection.getFields()) {
                if (match) {
                    match &= writeField(block, field, isNative, row, rowNum);
                }
            }
            return match ? 1 : 0;
        });
    }
    return true;
}

Source File: BlockTest.java From aws-athena-query-federation with Apache License 2.0

6 votes

@Test
public void constrainedBlockTest()
        throws Exception
{
    Schema schema = SchemaBuilder.newBuilder()
            .addIntField("col1")
            .addIntField("col2")
            .build();

    Block block = allocator.createBlock(schema);

    ValueSet col1Constraint = EquatableValueSet.newBuilder(allocator, Types.MinorType.INT.getType(), true, false)
            .add(10).build();
    Constraints constraints = new Constraints(Collections.singletonMap("col1", col1Constraint));
    try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, schema, constraints)) {
        block.constrain(constraintEvaluator);
        assertTrue(block.setValue("col1", 0, 10));
        assertTrue(block.offerValue("col1", 0, 10));
        assertFalse(block.setValue("col1", 0, 11));
        assertFalse(block.offerValue("col1", 0, 11));
        assertTrue(block.offerValue("unkown_col", 0, 10));
    }
}

Source File: BlockUtils.java From aws-athena-query-federation with Apache License 2.0

6 votes

/**
 * Creates a new Block with a single column and populated with the provided values.
 *
 * @param allocator The BlockAllocator to use when creating the Block.
 * @param columnName The name of the single column in the Block's Schema.
 * @param type The Apache Arrow Type of the column.
 * @param values The values to write to the new Block. Each value will be its own row.
 * @return The newly created Block with a single column Schema at populated with the provided values.
 */
public static Block newBlock(BlockAllocator allocator, String columnName, ArrowType type, Collection<Object> values)
{
    SchemaBuilder schemaBuilder = new SchemaBuilder();
    schemaBuilder.addField(columnName, type);
    Schema schema = schemaBuilder.build();
    Block block = allocator.createBlock(schema);
    int count = 0;
    for (Object next : values) {
        try {
            setValue(block.getFieldVector(columnName), count++, next);
        }
        catch (Exception ex) {
            throw new RuntimeException("Error for " + type + " " + columnName + " " + next, ex);
        }
    }
    block.setRowCount(count);
    return block;
}

Source File: TPCDSRecordHandler.java From aws-athena-query-federation with Apache License 2.0

6 votes

/**
 * Generates the CellWriters used to convert the TPCDS Generators data to Apache Arrow.
 *
 * @param schemaForRead The schema to read/project.
 * @param table The TPCDS Table we are reading from.
 * @return Map<Integer, CellWriter> where integer is the Column position in the TPCDS data set and the CellWriter
 * can be used to read,convert,write the value at that position for any row into the correct position and type
 * in our Apache Arrow response.
 */
private Map<Integer, CellWriter> makeWriters(Schema schemaForRead, Table table)
{
    Map<String, Column> columnPositions = new HashMap<>();
    for (Column next : table.getColumns()) {
        columnPositions.put(next.getName(), next);
    }

    //We use this approach to reduce the overhead of field lookups. This isn't as good as true columnar processing
    //using Arrow but it gets us ~80% of the way there from a rows/second per cpu-cycle perspective.
    Map<Integer, CellWriter> writers = new HashMap<>();
    for (Field nextField : schemaForRead.getFields()) {
        Column column = columnPositions.get(nextField.getName());
        writers.put(column.getPosition(), makeWriter(nextField, column));
    }
    return writers;
}

Source File: DremioArrowSchema.java From dremio-oss with Apache License 2.0

6 votes

/**
 * To parse Arrow Schema from JSON based on property
 * existing in Parquet Footer Metadata
 * @param properties
 * @return
 * @throws IOException
 */
public static Schema fromMetaData(Map<String, String> properties) throws IOException {
  Preconditions.checkNotNull(properties);
  String jsonArrowSchema = properties.get(DREMIO_ARROW_SCHEMA);
  String jsonArrowSchema2_1 = properties.get(DREMIO_ARROW_SCHEMA_2_1);

  // check in order
  // DREMIO_ARROW_SCHEMA - if found it is pre 2.1.0 generated file - use it
  // if DREMIO_ARROW_SCHEMA is not found
  // check DREMIO_ARROW_SCHEMA_2_1
  // if found - it is 2.1.0+ generated file - use it

  if (jsonArrowSchema != null) {
    return fromJSON(jsonArrowSchema);
  }
  if (jsonArrowSchema2_1 != null) {
    return fromJSON(jsonArrowSchema2_1);
  }
  return null;
}

Source File: RedisMetadataHandlerTest.java From aws-athena-query-federation with Apache License 2.0

6 votes

@Test
public void doGetTableLayout()
        throws Exception
{
    Schema schema = SchemaBuilder.newBuilder().build();

    GetTableLayoutRequest req = new GetTableLayoutRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG,
            TABLE_NAME,
            new Constraints(new HashMap<>()),
            schema,
            new HashSet<>());

    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);

    logger.info("doGetTableLayout - {}", res);
    Block partitions = res.getPartitions();
    for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
        logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
    }

    assertTrue(partitions.getRowCount() > 0);
    assertEquals(4, partitions.getFields().size());

    logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
}

Source File: ExampleUserDefinedFunctionHandlerTest.java From aws-athena-query-federation with Apache License 2.0

6 votes

private UserDefinedFunctionResponse runAndAssertSerialization(Block inputRecords,
                                                              Schema outputSchema,
                                                              String methodName) throws IOException
{
    UserDefinedFunctionRequest request = new UserDefinedFunctionRequest(IdentityUtil.fakeIdentity(),
            inputRecords,
            outputSchema,
            methodName,
            UserDefinedFunctionType.SCALAR);
    ObjectMapperUtil.assertSerialization(request);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    mapper.writeValue(out, request);
    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(out.toByteArray());
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

    exampleUserDefinedFunctionHandler.handleRequest(byteArrayInputStream, outputStream, null);

    UserDefinedFunctionResponse udfResponse = (UserDefinedFunctionResponse) mapper.readValue(outputStream.toByteArray(), FederationResponse.class);
    ObjectMapperUtil.assertSerialization(udfResponse);

    return udfResponse;
}

Source File: GetTableLayoutRequestSerDe.java From aws-athena-query-federation with Apache License 2.0

6 votes

@Override
protected MetadataRequest doRequestDeserialize(JsonParser jparser, DeserializationContext ctxt, FederatedIdentity identity, String queryId, String catalogName)
        throws IOException
{
    assertFieldName(jparser, TABLE_NAME_FIELD);
    TableName tableName = tableNameDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, CONSTRAINTS_FIELD);
    Constraints constraints = constraintsDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, SCHEMA_FIELD);
    Schema schema = schemaDeserializer.deserialize(jparser, ctxt);

    ImmutableSet.Builder<String> partitionColsSet = ImmutableSet.builder();
    partitionColsSet.addAll(getNextStringArray(jparser, PARTITION_COLS_FIELD));

    return new GetTableLayoutRequest(identity, queryId, catalogName, tableName, constraints, schema, partitionColsSet.build());
}

Source File: SampleSourceMetadata.java From dremio-oss with Apache License 2.0

6 votes

/**
 *
 * given some number n and n pathnames, generate those many datasets
 *
 * @param numDatasets number of datasets to add
 * @param pathNames   list of pathnames
 */
public void addNDatasets(int numDatasets, List<List<String>> pathNames) {
  if (numDatasets != pathNames.size()) {
    throw new UnsupportedOperationException();
  }

  DatasetStats datasetStats = DatasetStats.of(0, 0);
  Schema schema = new Schema(new ArrayList<>());

  for (int i = 0; i < numDatasets; i++) {
    EntityPath entityPath = new EntityPath(pathNames.get(i));
    DatasetMetadata datasetMetadata = DatasetMetadata.of(datasetStats, schema);

    addDatasetHandle(SampleHandleImpl.of(datasetMetadata, entityPath));
  }
}

Source File: AbstractTableProviderTest.java From aws-athena-query-federation with Apache License 2.0

6 votes

protected void validateRead(Schema schema, S3BlockSpillReader reader, List<SpillLocation> locations, EncryptionKey encryptionKey)
{
    int blockNum = 0;
    int rowNum = 0;
    for (SpillLocation next : locations) {
        S3SpillLocation spillLocation = (S3SpillLocation) next;
        try (Block block = reader.read(spillLocation, encryptionKey, schema)) {
            logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());

            for (int i = 0; i < block.getRowCount(); i++) {
                logger.info("validateRead: {}", BlockUtils.rowToString(block, i));
                rowNum++;
                validateRow(block, i);
            }
        }
        catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }

    assertEquals(getExpectedRows(), rowNum);
}

Source File: AbstractTestNamespaceService.java From dremio-oss with Apache License 2.0

5 votes

@Test
public void testDataSetSchema() throws Exception {
  Field field1 = new Field("a", true, new Int(32, true), null);
  Field child1 = new Field("c", true, Utf8.INSTANCE, null);
  Field field2 = new Field("b", true, Struct.INSTANCE, ImmutableList.of(child1));
  Schema schema = new Schema(ImmutableList.of(field1, field2));
  FlatBufferBuilder builder = new FlatBufferBuilder();
  schema.getSchema(builder);
  builder.finish(schema.getSchema(builder));
  NamespaceTestUtils.addSource(namespaceService, "s");
  NamespaceTestUtils.addPhysicalDS(namespaceService, "s.foo", builder.sizedByteArray());
  ByteBuffer bb = ByteBuffer.wrap(DatasetHelper.getSchemaBytes(namespaceService.getDataset(new NamespaceKey(PathUtils.parseFullPath("s.foo")))).toByteArray());
  Schema returnedSchema = Schema.convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(bb));
  assertEquals(schema, returnedSchema);
}

Source File: BlockUtilsTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Test
public void isNullRow()
{
    Schema schema = SchemaBuilder.newBuilder()
            .addField("col1", new ArrowType.Int(32, true))
            .addField("col2", new ArrowType.Int(32, true))
            .addField("col3", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"))
            .build();

    LocalDateTime ldt = LocalDateTime.now();

    //Make a block with 2 rows and no null rows
    Block block = allocator.createBlock(schema);
    BlockUtils.setValue(block.getFieldVector("col1"), 0, 10);
    BlockUtils.setValue(block.getFieldVector("col2"), 0, 20);
    BlockUtils.setValue(block.getFieldVector("col3"), 0, ldt);

    BlockUtils.setValue(block.getFieldVector("col1"), 1, 11);
    BlockUtils.setValue(block.getFieldVector("col2"), 1, 21);
    BlockUtils.setValue(block.getFieldVector("col3"), 1, ZonedDateTime.of(ldt, ZoneId.of("-05:00")));
    block.setRowCount(2);

    assertFalse(BlockUtils.isNullRow(block, 1));

    //now set a row to null
    BlockUtils.unsetRow(1, block);
    assertTrue(BlockUtils.isNullRow(block, 1));
}

Source File: JdbcMetadataHandler.java From aws-athena-query-federation with Apache License 2.0

5 votes

private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema)
        throws SQLException
{
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();

    try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData())) {
        boolean found = false;
        while (resultSet.next()) {
            ArrowType columnType = JdbcArrowTypeConverter.toArrowType(
                    resultSet.getInt("DATA_TYPE"),
                    resultSet.getInt("COLUMN_SIZE"),
                    resultSet.getInt("DECIMAL_DIGITS"));
            String columnName = resultSet.getString("COLUMN_NAME");
            if (columnType != null && SupportedTypes.isSupported(columnType)) {
                schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
                found = true;
            }
            else {
                LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
            }
        }

        if (!found) {
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }

        // add partition columns
        partitionSchema.getFields().forEach(schemaBuilder::addField);

        return schemaBuilder.build();
    }
}

Source File: ExampleUserDefinedFunctionHandlerTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Test
public void testToJsonMethod() throws Exception
{
    Schema inputSchema = SchemaBuilder.newBuilder()
            .addStructField("struct")
            .addChildField("struct", "int", Types.MinorType.INT.getType())
            .addChildField("struct", "double", Types.MinorType.FLOAT8.getType())
            .addChildField("struct", "string", Types.MinorType.VARCHAR.getType())
            .build();
    Schema outputSchema = SchemaBuilder.newBuilder()
            .addField("json", Types.MinorType.VARCHAR.getType())
            .build();

    Block inputRecords = allocator.createBlock(inputSchema);
    inputRecords.setRowCount(1);
    FieldVector fieldVector = inputRecords.getFieldVector("struct");
    Map<String, Object> struct = new HashMap<>();
    struct.put("int", 10);
    struct.put("double", 2.3);
    struct.put("string", "test_string");
    BlockUtils.setComplexValue(fieldVector, 0, FieldResolver.DEFAULT, struct);

    UserDefinedFunctionResponse response = runAndAssertSerialization(inputRecords, outputSchema, "to_json");

    Block outputRecords = response.getRecords();
    assertEquals(1, outputRecords.getRowCount());
    FieldReader fieldReader = outputRecords.getFieldReader("json");
    ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
    assertEquals(exampleUserDefinedFunctionHandler.to_json(struct), arrowValueProjector.project(0));
}

Source File: BlockUtilsTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Test
public void fieldToString()
{
    Schema schema = SchemaBuilder.newBuilder()
            .addField("col1", new ArrowType.Int(32, true))
            .addField("col2", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"))
            .build();

    LocalDateTime ldt = LocalDateTime.of(2020, 03, 18, 12,54,29);

    //Make a block with 2 rows and no null rows
    Block block = allocator.createBlock(schema);
    BlockUtils.setValue(block.getFieldVector("col1"), 0, 10);
    BlockUtils.setValue(block.getFieldVector("col2"), 0, ldt);

    BlockUtils.setValue(block.getFieldVector("col1"), 1, 11);
    BlockUtils.setValue(block.getFieldVector("col2"), 1, ZonedDateTime.of(ldt, ZoneId.of("-05:00")));
    block.setRowCount(2);

    String expectedRows = "rows=2";
    String expectedCol1 = "[10, 11]";
    String expectedCol2 = "[2020-03-18T12:54:29Z[UTC], 2020-03-18T12:54:29-05:00]";
    String actual = block.toString();
    assertTrue(actual.contains(expectedRows));
    assertTrue(actual.contains(expectedCol1));
    assertTrue(actual.contains(expectedCol2));
}

Source File: ReadRecordsResponseSerDeTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Before
public void beforeTest()
        throws IOException
{
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";

    Schema schema = SchemaBuilder.newBuilder()
            .addField(yearCol, new ArrowType.Int(32, true))
            .addField(monthCol, new ArrowType.Int(32, true))
            .addField(dayCol, new ArrowType.Int(32, true))
            .build();

    Block records = allocator.createBlock(schema);
    int num_records = 10;
    for (int i = 0; i < num_records; i++) {
        BlockUtils.setValue(records.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(records.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(records.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    records.setRowCount(num_records);

    expected = new ReadRecordsResponse("test-catalog", records);

    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsResponse.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}

Source File: PostGreSqlMetadataHandlerTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Test
public void doGetSplitsContinuation()
        throws Exception
{
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.postGreSqlMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);

    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(PostGreSqlMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);

    String[] columns = {"child_schema", "child"};
    int[] types = {Types.VARCHAR, Types.VARCHAR};
    Object[][] values = {{"s0", "p0"}, {"s1", "p1"}};
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    final String expectedQuery = String.format(PostGreSqlMetadataHandler.GET_PARTITIONS_QUERY, tableName.getTableName(), tableName.getSchemaName());
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);

    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);

    GetTableLayoutResponse getTableLayoutResponse = this.postGreSqlMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);

    BlockAllocator splitBlockAllocator = new BlockAllocatorImpl();
    GetSplitsRequest getSplitsRequest = new GetSplitsRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, getTableLayoutResponse.getPartitions(), new ArrayList<>(partitionCols), constraints, "1");
    GetSplitsResponse getSplitsResponse = this.postGreSqlMetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest);

    Set<Map<String, String>> expectedSplits = new HashSet<>();
    expectedSplits.add(ImmutableMap.of("partition_schema_name", "s1", "partition_name", "p1"));
    Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size());
    Set<Map<String, String>> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet());
    Assert.assertEquals(expectedSplits, actualSplits);
}

Source File: HbaseSchemaUtilsTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Test
public void inferSchema()
        throws IOException
{
    int numToScan = 4;
    TableName tableName = new TableName("schema", "table");
    List<Result> results = TestUtils.makeResults();

    HBaseConnection mockConnection = mock(HBaseConnection.class);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockConnection.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });

    Schema schema = HbaseSchemaUtils.inferSchema(mockConnection, tableName, numToScan);

    Map<String, Types.MinorType> actualFields = new HashMap<>();
    schema.getFields().stream().forEach(next -> actualFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType())));

    Map<String, Types.MinorType> expectedFields = new HashMap<>();
    TestUtils.makeSchema().build().getFields().stream()
            .forEach(next -> expectedFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType())));

    for (Map.Entry<String, Types.MinorType> nextExpected : expectedFields.entrySet()) {
        assertNotNull(actualFields.get(nextExpected.getKey()));
        assertEquals(nextExpected.getKey(), nextExpected.getValue(), actualFields.get(nextExpected.getKey()));
    }
    assertEquals(expectedFields.size(), actualFields.size());

    verify(mockConnection, times(1)).scanTable(anyObject(), any(Scan.class), any(ResultProcessor.class));
    verify(mockScanner, times(1)).iterator();
}

Source File: DatasetMetadataImpl.java From dremio-oss with Apache License 2.0

5 votes

DatasetMetadataImpl(
    DatasetStats stats,
    Schema schema,
    List<String> partitionColumns,
    List<String> sortColumns,
    BytesOutput extraInfo
) {
  this.stats = stats;
  this.schema = schema;
  this.partitionColumns = partitionColumns;
  this.sortColumns = sortColumns;
  this.extraInfo = extraInfo;
}

Source File: NativeProjector.java From dremio-oss with Apache License 2.0

5 votes

NativeProjector(VectorAccessible incoming, Schema schema, FunctionContext functionContext) {
  this.incoming = incoming;
  this.schema = schema;
  this.functionContext = functionContext;
  // preserve order of insertion
  referencedFields = Sets.newLinkedHashSet();
}

Source File: HiveDatasetMetadata.java From dremio-oss with Apache License 2.0

5 votes

private HiveDatasetMetadata(
  final Schema schema,
  final List<String> partitionColumns,
  final List<String> sortColumns,
  final BytesOutput extraInfo,
  final MetadataAccumulator metadataAccumulator
) {
  this.schema = schema;
  this.partitionColumns = partitionColumns;
  this.sortColumns = sortColumns;
  this.extraInfo = extraInfo;
  this.metadataAccumulator = metadataAccumulator;
}

Source File: DDBRecordMetadata.java From aws-athena-query-federation with Apache License 2.0

5 votes

/**
 * Retrieves the map of glue column names to glue/normalized column names from the table schema
 * @param schema Schema to extract out the info from
 * @return mapping of glue column names to ddb column names
 */
private static Map<String, String> getColumnNameMapping(Schema schema)
{
    if (schema != null && schema.getCustomMetadata() != null) {
        String columnNameMappingParam = schema.getCustomMetadata().getOrDefault(
                COLUMN_NAME_MAPPING_PROPERTY, null);
        if (!Strings.isNullOrEmpty(columnNameMappingParam)) {
            return new HashMap<>(MAP_SPLITTER.split(columnNameMappingParam));
        }
    }
    return ImmutableMap.of();
}

Source File: TestSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

@Test
public void testParquetFixedBinaryToArrowDecimal() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(5).as(DECIMAL).precision(8).scale(2).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Decimal(8, 2))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}

Source File: JdbcMetadataHandlerTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Before
public void setup()
{
    this.jdbcConnectionFactory = Mockito.mock(JdbcConnectionFactory.class);
    this.connection = Mockito.mock(Connection.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(this.jdbcConnectionFactory.getConnection(Mockito.any(JdbcCredentialProvider.class))).thenReturn(this.connection);
    this.secretsManager = Mockito.mock(AWSSecretsManager.class);
    this.athena = Mockito.mock(AmazonAthena.class);
    Mockito.when(this.secretsManager.getSecretValue(Mockito.eq(new GetSecretValueRequest().withSecretId("testSecret")))).thenReturn(new GetSecretValueResult().withSecretString("{\"username\": \"testUser\", \"password\": \"testPassword\"}"));
    DatabaseConnectionConfig databaseConnectionConfig = new DatabaseConnectionConfig("testCatalog", JdbcConnectionFactory.DatabaseEngine.MYSQL,
            "mysql://jdbc:mysql://hostname/${testSecret}", "testSecret");
    this.jdbcMetadataHandler = new JdbcMetadataHandler(databaseConnectionConfig, this.secretsManager, this.athena, jdbcConnectionFactory)
    {
        @Override
        public Schema getPartitionSchema(final String catalogName)
        {
            return PARTITION_SCHEMA;
        }

        @Override
        public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker)
        {
        }

        @Override
        public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest)
        {
            return null;
        }
    };
    this.federatedIdentity = Mockito.mock(FederatedIdentity.class);
    this.blockAllocator = Mockito.mock(BlockAllocator.class);
}

Source File: TestSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

@Test
public void testParquetInt64TimestampMillisToArrow() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}

Source File: FieldIdUtil2.java From dremio-oss with Apache License 2.0

5 votes

public static TypedFieldId getFieldId(Schema schema, BasePath path, boolean isHyper){
  int i = 0;
  for (Field f : schema.getFields()) {
    TypedFieldId id = getFieldId(f, i, path, isHyper);
    if (id != null) {
      return id;
    }
    i++;
  }
  return null;
}

Source File: UserDefinedFunctionRequestSerDeTest.java From aws-athena-query-federation with Apache License 2.0

5 votes

@Before
public void beforeTest()
        throws IOException
{
    Schema inputSchema = SchemaBuilder.newBuilder()
            .addField("factor1", Types.MinorType.INT.getType())
            .addField("factor2", Types.MinorType.INT.getType())
            .build();
    Schema outputSchema = SchemaBuilder.newBuilder()
            .addField("product", Types.MinorType.INT.getType())
            .build();

    Block inputRecords = allocator.createBlock(inputSchema);
    inputRecords.setRowCount(1);
    IntVector inputVector1 = (IntVector) inputRecords.getFieldVector("factor1");
    IntVector inputVector2 = (IntVector) inputRecords.getFieldVector("factor2");
    inputVector1.setSafe(0, 2);
    inputVector2.setSafe(0, 3);

    expected = new UserDefinedFunctionRequest(federatedIdentity,
            inputRecords,
            outputSchema,
            "test-method",
            UserDefinedFunctionType.SCALAR);


    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "UserDefinedFunctionRequest.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}

Source File: TestSchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

@Test
public void testParquetInt32TimeMillisToArrow() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(INT32).as(TIME_MILLIS).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Time(TimeUnit.MILLISECOND, 32))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}

org.apache.arrow.vector.types.pojo.Schema Java Examples