org.apache.arrow.vector.types.pojo.Schema Java Examples
The following examples show how to use
org.apache.arrow.vector.types.pojo.Schema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UserDefinedFunctionRequestSerDe.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
@Override protected FederationRequest doTypedDeserialize(JsonParser jparser, DeserializationContext ctxt) throws IOException { assertFieldName(jparser, IDENTITY_FIELD); FederatedIdentity identity = identityDeserializer.deserialize(jparser, ctxt); assertFieldName(jparser, INPUT_RECORDS_FIELD); Block inputRecords = blockDeserializer.deserialize(jparser, ctxt); assertFieldName(jparser, OUTPUT_SCHEMA_FIELD); Schema outputSchema = schemaDeserializer.deserialize(jparser, ctxt); String methodName = getNextStringField(jparser, METHOD_NAME_FIELD); UserDefinedFunctionType functionType = UserDefinedFunctionType.valueOf(getNextStringField(jparser, FUNCTION_TYPE_FIELD)); return new UserDefinedFunctionRequest(identity, inputRecords, outputSchema, methodName, functionType); }
Example #2
Source File: LambdaMetadataProvider.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
/** * This method builds and executes a GetTableLayoutRequest against the specified Lambda function. * * @param catalog the catalog name to be passed to Lambda * @param tableName the schema-qualified table name indicating the table whose layout should be retrieved * @param constraints the constraints to be applied to the request * @param schema the schema of the table in question * @param partitionCols the partition column names for the table in question * @param metadataFunction the name of the Lambda function to call * @param identity the identity of the caller * @return the response */ public static GetTableLayoutResponse getTableLayout(String catalog, TableName tableName, Constraints constraints, Schema schema, Set<String> partitionCols, String metadataFunction, FederatedIdentity identity) { String queryId = generateQueryId(); log.info("Submitting GetTableLayoutRequest with ID " + queryId); try (GetTableLayoutRequest request = new GetTableLayoutRequest(identity, queryId, catalog, tableName, constraints, schema, partitionCols)) { log.info("Submitting request: {}", request); GetTableLayoutResponse response = (GetTableLayoutResponse) getService(metadataFunction, identity, catalog).call(request); log.info("Received response: {}", response); return response; } catch (Exception e) { throw new RuntimeException(e); } }
Example #3
Source File: HbaseRecordHandler.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
private boolean scanFilterProject(ResultScanner scanner, ReadRecordsRequest request, BlockSpiller blockSpiller, QueryStatusChecker queryStatusChecker) { Schema projection = request.getSchema(); boolean isNative = projection.getCustomMetadata().get(HBASE_NATIVE_STORAGE_FLAG) != null; for (Result row : scanner) { if (!queryStatusChecker.isQueryRunning()) { return true; } blockSpiller.writeRows((Block block, int rowNum) -> { boolean match = true; for (Field field : projection.getFields()) { if (match) { match &= writeField(block, field, isNative, row, rowNum); } } return match ? 1 : 0; }); } return true; }
Example #4
Source File: BlockTest.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
@Test public void constrainedBlockTest() throws Exception { Schema schema = SchemaBuilder.newBuilder() .addIntField("col1") .addIntField("col2") .build(); Block block = allocator.createBlock(schema); ValueSet col1Constraint = EquatableValueSet.newBuilder(allocator, Types.MinorType.INT.getType(), true, false) .add(10).build(); Constraints constraints = new Constraints(Collections.singletonMap("col1", col1Constraint)); try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, schema, constraints)) { block.constrain(constraintEvaluator); assertTrue(block.setValue("col1", 0, 10)); assertTrue(block.offerValue("col1", 0, 10)); assertFalse(block.setValue("col1", 0, 11)); assertFalse(block.offerValue("col1", 0, 11)); assertTrue(block.offerValue("unkown_col", 0, 10)); } }
Example #5
Source File: BlockUtils.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
/** * Creates a new Block with a single column and populated with the provided values. * * @param allocator The BlockAllocator to use when creating the Block. * @param columnName The name of the single column in the Block's Schema. * @param type The Apache Arrow Type of the column. * @param values The values to write to the new Block. Each value will be its own row. * @return The newly created Block with a single column Schema at populated with the provided values. */ public static Block newBlock(BlockAllocator allocator, String columnName, ArrowType type, Collection<Object> values) { SchemaBuilder schemaBuilder = new SchemaBuilder(); schemaBuilder.addField(columnName, type); Schema schema = schemaBuilder.build(); Block block = allocator.createBlock(schema); int count = 0; for (Object next : values) { try { setValue(block.getFieldVector(columnName), count++, next); } catch (Exception ex) { throw new RuntimeException("Error for " + type + " " + columnName + " " + next, ex); } } block.setRowCount(count); return block; }
Example #6
Source File: TPCDSRecordHandler.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
/** * Generates the CellWriters used to convert the TPCDS Generators data to Apache Arrow. * * @param schemaForRead The schema to read/project. * @param table The TPCDS Table we are reading from. * @return Map<Integer, CellWriter> where integer is the Column position in the TPCDS data set and the CellWriter * can be used to read,convert,write the value at that position for any row into the correct position and type * in our Apache Arrow response. */ private Map<Integer, CellWriter> makeWriters(Schema schemaForRead, Table table) { Map<String, Column> columnPositions = new HashMap<>(); for (Column next : table.getColumns()) { columnPositions.put(next.getName(), next); } //We use this approach to reduce the overhead of field lookups. This isn't as good as true columnar processing //using Arrow but it gets us ~80% of the way there from a rows/second per cpu-cycle perspective. Map<Integer, CellWriter> writers = new HashMap<>(); for (Field nextField : schemaForRead.getFields()) { Column column = columnPositions.get(nextField.getName()); writers.put(column.getPosition(), makeWriter(nextField, column)); } return writers; }
Example #7
Source File: DremioArrowSchema.java From dremio-oss with Apache License 2.0 | 6 votes |
/** * To parse Arrow Schema from JSON based on property * existing in Parquet Footer Metadata * @param properties * @return * @throws IOException */ public static Schema fromMetaData(Map<String, String> properties) throws IOException { Preconditions.checkNotNull(properties); String jsonArrowSchema = properties.get(DREMIO_ARROW_SCHEMA); String jsonArrowSchema2_1 = properties.get(DREMIO_ARROW_SCHEMA_2_1); // check in order // DREMIO_ARROW_SCHEMA - if found it is pre 2.1.0 generated file - use it // if DREMIO_ARROW_SCHEMA is not found // check DREMIO_ARROW_SCHEMA_2_1 // if found - it is 2.1.0+ generated file - use it if (jsonArrowSchema != null) { return fromJSON(jsonArrowSchema); } if (jsonArrowSchema2_1 != null) { return fromJSON(jsonArrowSchema2_1); } return null; }
Example #8
Source File: RedisMetadataHandlerTest.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
@Test public void doGetTableLayout() throws Exception { Schema schema = SchemaBuilder.newBuilder().build(); GetTableLayoutRequest req = new GetTableLayoutRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG, TABLE_NAME, new Constraints(new HashMap<>()), schema, new HashSet<>()); GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req); logger.info("doGetTableLayout - {}", res); Block partitions = res.getPartitions(); for (int row = 0; row < partitions.getRowCount() && row < 10; row++) { logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row)); } assertTrue(partitions.getRowCount() > 0); assertEquals(4, partitions.getFields().size()); logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount()); }
Example #9
Source File: ExampleUserDefinedFunctionHandlerTest.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
private UserDefinedFunctionResponse runAndAssertSerialization(Block inputRecords, Schema outputSchema, String methodName) throws IOException { UserDefinedFunctionRequest request = new UserDefinedFunctionRequest(IdentityUtil.fakeIdentity(), inputRecords, outputSchema, methodName, UserDefinedFunctionType.SCALAR); ObjectMapperUtil.assertSerialization(request); ByteArrayOutputStream out = new ByteArrayOutputStream(); mapper.writeValue(out, request); ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(out.toByteArray()); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); exampleUserDefinedFunctionHandler.handleRequest(byteArrayInputStream, outputStream, null); UserDefinedFunctionResponse udfResponse = (UserDefinedFunctionResponse) mapper.readValue(outputStream.toByteArray(), FederationResponse.class); ObjectMapperUtil.assertSerialization(udfResponse); return udfResponse; }
Example #10
Source File: GetTableLayoutRequestSerDe.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
@Override protected MetadataRequest doRequestDeserialize(JsonParser jparser, DeserializationContext ctxt, FederatedIdentity identity, String queryId, String catalogName) throws IOException { assertFieldName(jparser, TABLE_NAME_FIELD); TableName tableName = tableNameDeserializer.deserialize(jparser, ctxt); assertFieldName(jparser, CONSTRAINTS_FIELD); Constraints constraints = constraintsDeserializer.deserialize(jparser, ctxt); assertFieldName(jparser, SCHEMA_FIELD); Schema schema = schemaDeserializer.deserialize(jparser, ctxt); ImmutableSet.Builder<String> partitionColsSet = ImmutableSet.builder(); partitionColsSet.addAll(getNextStringArray(jparser, PARTITION_COLS_FIELD)); return new GetTableLayoutRequest(identity, queryId, catalogName, tableName, constraints, schema, partitionColsSet.build()); }
Example #11
Source File: SampleSourceMetadata.java From dremio-oss with Apache License 2.0 | 6 votes |
/** * * given some number n and n pathnames, generate those many datasets * * @param numDatasets number of datasets to add * @param pathNames list of pathnames */ public void addNDatasets(int numDatasets, List<List<String>> pathNames) { if (numDatasets != pathNames.size()) { throw new UnsupportedOperationException(); } DatasetStats datasetStats = DatasetStats.of(0, 0); Schema schema = new Schema(new ArrayList<>()); for (int i = 0; i < numDatasets; i++) { EntityPath entityPath = new EntityPath(pathNames.get(i)); DatasetMetadata datasetMetadata = DatasetMetadata.of(datasetStats, schema); addDatasetHandle(SampleHandleImpl.of(datasetMetadata, entityPath)); } }
Example #12
Source File: AbstractTableProviderTest.java From aws-athena-query-federation with Apache License 2.0 | 6 votes |
protected void validateRead(Schema schema, S3BlockSpillReader reader, List<SpillLocation> locations, EncryptionKey encryptionKey) { int blockNum = 0; int rowNum = 0; for (SpillLocation next : locations) { S3SpillLocation spillLocation = (S3SpillLocation) next; try (Block block = reader.read(spillLocation, encryptionKey, schema)) { logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount()); for (int i = 0; i < block.getRowCount(); i++) { logger.info("validateRead: {}", BlockUtils.rowToString(block, i)); rowNum++; validateRow(block, i); } } catch (Exception ex) { throw new RuntimeException(ex); } } assertEquals(getExpectedRows(), rowNum); }
Example #13
Source File: AbstractTestNamespaceService.java From dremio-oss with Apache License 2.0 | 5 votes |
@Test public void testDataSetSchema() throws Exception { Field field1 = new Field("a", true, new Int(32, true), null); Field child1 = new Field("c", true, Utf8.INSTANCE, null); Field field2 = new Field("b", true, Struct.INSTANCE, ImmutableList.of(child1)); Schema schema = new Schema(ImmutableList.of(field1, field2)); FlatBufferBuilder builder = new FlatBufferBuilder(); schema.getSchema(builder); builder.finish(schema.getSchema(builder)); NamespaceTestUtils.addSource(namespaceService, "s"); NamespaceTestUtils.addPhysicalDS(namespaceService, "s.foo", builder.sizedByteArray()); ByteBuffer bb = ByteBuffer.wrap(DatasetHelper.getSchemaBytes(namespaceService.getDataset(new NamespaceKey(PathUtils.parseFullPath("s.foo")))).toByteArray()); Schema returnedSchema = Schema.convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(bb)); assertEquals(schema, returnedSchema); }
Example #14
Source File: BlockUtilsTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Test public void isNullRow() { Schema schema = SchemaBuilder.newBuilder() .addField("col1", new ArrowType.Int(32, true)) .addField("col2", new ArrowType.Int(32, true)) .addField("col3", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")) .build(); LocalDateTime ldt = LocalDateTime.now(); //Make a block with 2 rows and no null rows Block block = allocator.createBlock(schema); BlockUtils.setValue(block.getFieldVector("col1"), 0, 10); BlockUtils.setValue(block.getFieldVector("col2"), 0, 20); BlockUtils.setValue(block.getFieldVector("col3"), 0, ldt); BlockUtils.setValue(block.getFieldVector("col1"), 1, 11); BlockUtils.setValue(block.getFieldVector("col2"), 1, 21); BlockUtils.setValue(block.getFieldVector("col3"), 1, ZonedDateTime.of(ldt, ZoneId.of("-05:00"))); block.setRowCount(2); assertFalse(BlockUtils.isNullRow(block, 1)); //now set a row to null BlockUtils.unsetRow(1, block); assertTrue(BlockUtils.isNullRow(block, 1)); }
Example #15
Source File: JdbcMetadataHandler.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException { SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder(); try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData())) { boolean found = false; while (resultSet.next()) { ArrowType columnType = JdbcArrowTypeConverter.toArrowType( resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS")); String columnName = resultSet.getString("COLUMN_NAME"); if (columnType != null && SupportedTypes.isSupported(columnType)) { schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build()); found = true; } else { LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType); } } if (!found) { throw new RuntimeException("Could not find table in " + tableName.getSchemaName()); } // add partition columns partitionSchema.getFields().forEach(schemaBuilder::addField); return schemaBuilder.build(); } }
Example #16
Source File: ExampleUserDefinedFunctionHandlerTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Test public void testToJsonMethod() throws Exception { Schema inputSchema = SchemaBuilder.newBuilder() .addStructField("struct") .addChildField("struct", "int", Types.MinorType.INT.getType()) .addChildField("struct", "double", Types.MinorType.FLOAT8.getType()) .addChildField("struct", "string", Types.MinorType.VARCHAR.getType()) .build(); Schema outputSchema = SchemaBuilder.newBuilder() .addField("json", Types.MinorType.VARCHAR.getType()) .build(); Block inputRecords = allocator.createBlock(inputSchema); inputRecords.setRowCount(1); FieldVector fieldVector = inputRecords.getFieldVector("struct"); Map<String, Object> struct = new HashMap<>(); struct.put("int", 10); struct.put("double", 2.3); struct.put("string", "test_string"); BlockUtils.setComplexValue(fieldVector, 0, FieldResolver.DEFAULT, struct); UserDefinedFunctionResponse response = runAndAssertSerialization(inputRecords, outputSchema, "to_json"); Block outputRecords = response.getRecords(); assertEquals(1, outputRecords.getRowCount()); FieldReader fieldReader = outputRecords.getFieldReader("json"); ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader); assertEquals(exampleUserDefinedFunctionHandler.to_json(struct), arrowValueProjector.project(0)); }
Example #17
Source File: BlockUtilsTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Test public void fieldToString() { Schema schema = SchemaBuilder.newBuilder() .addField("col1", new ArrowType.Int(32, true)) .addField("col2", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")) .build(); LocalDateTime ldt = LocalDateTime.of(2020, 03, 18, 12,54,29); //Make a block with 2 rows and no null rows Block block = allocator.createBlock(schema); BlockUtils.setValue(block.getFieldVector("col1"), 0, 10); BlockUtils.setValue(block.getFieldVector("col2"), 0, ldt); BlockUtils.setValue(block.getFieldVector("col1"), 1, 11); BlockUtils.setValue(block.getFieldVector("col2"), 1, ZonedDateTime.of(ldt, ZoneId.of("-05:00"))); block.setRowCount(2); String expectedRows = "rows=2"; String expectedCol1 = "[10, 11]"; String expectedCol2 = "[2020-03-18T12:54:29Z[UTC], 2020-03-18T12:54:29-05:00]"; String actual = block.toString(); assertTrue(actual.contains(expectedRows)); assertTrue(actual.contains(expectedCol1)); assertTrue(actual.contains(expectedCol2)); }
Example #18
Source File: ReadRecordsResponseSerDeTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Before public void beforeTest() throws IOException { String yearCol = "year"; String monthCol = "month"; String dayCol = "day"; Schema schema = SchemaBuilder.newBuilder() .addField(yearCol, new ArrowType.Int(32, true)) .addField(monthCol, new ArrowType.Int(32, true)) .addField(dayCol, new ArrowType.Int(32, true)) .build(); Block records = allocator.createBlock(schema); int num_records = 10; for (int i = 0; i < num_records; i++) { BlockUtils.setValue(records.getFieldVector(yearCol), i, 2016 + i); BlockUtils.setValue(records.getFieldVector(monthCol), i, (i % 12) + 1); BlockUtils.setValue(records.getFieldVector(dayCol), i, (i % 28) + 1); } records.setRowCount(num_records); expected = new ReadRecordsResponse("test-catalog", records); String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsResponse.json"); expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim(); }
Example #19
Source File: PostGreSqlMetadataHandlerTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Test public void doGetSplitsContinuation() throws Exception { BlockAllocator blockAllocator = new BlockAllocatorImpl(); Constraints constraints = Mockito.mock(Constraints.class); TableName tableName = new TableName("testSchema", "testTable"); Schema partitionSchema = this.postGreSqlMetadataHandler.getPartitionSchema("testCatalogName"); Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet()); GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols); PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(PostGreSqlMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); String[] columns = {"child_schema", "child"}; int[] types = {Types.VARCHAR, Types.VARCHAR}; Object[][] values = {{"s0", "p0"}, {"s1", "p1"}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); final String expectedQuery = String.format(PostGreSqlMetadataHandler.GET_PARTITIONS_QUERY, tableName.getTableName(), tableName.getSchemaName()); Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet); Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null); GetTableLayoutResponse getTableLayoutResponse = this.postGreSqlMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest); BlockAllocator splitBlockAllocator = new BlockAllocatorImpl(); GetSplitsRequest getSplitsRequest = new GetSplitsRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, getTableLayoutResponse.getPartitions(), new ArrayList<>(partitionCols), constraints, "1"); GetSplitsResponse getSplitsResponse = this.postGreSqlMetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest); Set<Map<String, String>> expectedSplits = new HashSet<>(); expectedSplits.add(ImmutableMap.of("partition_schema_name", "s1", "partition_name", "p1")); Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size()); Set<Map<String, String>> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet()); Assert.assertEquals(expectedSplits, actualSplits); }
Example #20
Source File: HbaseSchemaUtilsTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Test public void inferSchema() throws IOException { int numToScan = 4; TableName tableName = new TableName("schema", "table"); List<Result> results = TestUtils.makeResults(); HBaseConnection mockConnection = mock(HBaseConnection.class); ResultScanner mockScanner = mock(ResultScanner.class); when(mockScanner.iterator()).thenReturn(results.iterator()); when(mockConnection.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> { ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2]; return processor.scan(mockScanner); }); Schema schema = HbaseSchemaUtils.inferSchema(mockConnection, tableName, numToScan); Map<String, Types.MinorType> actualFields = new HashMap<>(); schema.getFields().stream().forEach(next -> actualFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType()))); Map<String, Types.MinorType> expectedFields = new HashMap<>(); TestUtils.makeSchema().build().getFields().stream() .forEach(next -> expectedFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType()))); for (Map.Entry<String, Types.MinorType> nextExpected : expectedFields.entrySet()) { assertNotNull(actualFields.get(nextExpected.getKey())); assertEquals(nextExpected.getKey(), nextExpected.getValue(), actualFields.get(nextExpected.getKey())); } assertEquals(expectedFields.size(), actualFields.size()); verify(mockConnection, times(1)).scanTable(anyObject(), any(Scan.class), any(ResultProcessor.class)); verify(mockScanner, times(1)).iterator(); }
Example #21
Source File: DatasetMetadataImpl.java From dremio-oss with Apache License 2.0 | 5 votes |
DatasetMetadataImpl( DatasetStats stats, Schema schema, List<String> partitionColumns, List<String> sortColumns, BytesOutput extraInfo ) { this.stats = stats; this.schema = schema; this.partitionColumns = partitionColumns; this.sortColumns = sortColumns; this.extraInfo = extraInfo; }
Example #22
Source File: NativeProjector.java From dremio-oss with Apache License 2.0 | 5 votes |
NativeProjector(VectorAccessible incoming, Schema schema, FunctionContext functionContext) { this.incoming = incoming; this.schema = schema; this.functionContext = functionContext; // preserve order of insertion referencedFields = Sets.newLinkedHashSet(); }
Example #23
Source File: HiveDatasetMetadata.java From dremio-oss with Apache License 2.0 | 5 votes |
private HiveDatasetMetadata( final Schema schema, final List<String> partitionColumns, final List<String> sortColumns, final BytesOutput extraInfo, final MetadataAccumulator metadataAccumulator ) { this.schema = schema; this.partitionColumns = partitionColumns; this.sortColumns = sortColumns; this.extraInfo = extraInfo; this.metadataAccumulator = metadataAccumulator; }
Example #24
Source File: DDBRecordMetadata.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
/** * Retrieves the map of glue column names to glue/normalized column names from the table schema * @param schema Schema to extract out the info from * @return mapping of glue column names to ddb column names */ private static Map<String, String> getColumnNameMapping(Schema schema) { if (schema != null && schema.getCustomMetadata() != null) { String columnNameMappingParam = schema.getCustomMetadata().getOrDefault( COLUMN_NAME_MAPPING_PROPERTY, null); if (!Strings.isNullOrEmpty(columnNameMappingParam)) { return new HashMap<>(MAP_SPLITTER.split(columnNameMappingParam)); } } return ImmutableMap.of(); }
Example #25
Source File: TestSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testParquetFixedBinaryToArrowDecimal() { MessageType parquet = Types.buildMessage() .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(5).as(DECIMAL).precision(8).scale(2).named("a")).named("root"); Schema expected = new Schema(asList( field("a", new ArrowType.Decimal(8, 2)) )); Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema()); }
Example #26
Source File: JdbcMetadataHandlerTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Before public void setup() { this.jdbcConnectionFactory = Mockito.mock(JdbcConnectionFactory.class); this.connection = Mockito.mock(Connection.class, Mockito.RETURNS_DEEP_STUBS); Mockito.when(this.jdbcConnectionFactory.getConnection(Mockito.any(JdbcCredentialProvider.class))).thenReturn(this.connection); this.secretsManager = Mockito.mock(AWSSecretsManager.class); this.athena = Mockito.mock(AmazonAthena.class); Mockito.when(this.secretsManager.getSecretValue(Mockito.eq(new GetSecretValueRequest().withSecretId("testSecret")))).thenReturn(new GetSecretValueResult().withSecretString("{\"username\": \"testUser\", \"password\": \"testPassword\"}")); DatabaseConnectionConfig databaseConnectionConfig = new DatabaseConnectionConfig("testCatalog", JdbcConnectionFactory.DatabaseEngine.MYSQL, "mysql://jdbc:mysql://hostname/${testSecret}", "testSecret"); this.jdbcMetadataHandler = new JdbcMetadataHandler(databaseConnectionConfig, this.secretsManager, this.athena, jdbcConnectionFactory) { @Override public Schema getPartitionSchema(final String catalogName) { return PARTITION_SCHEMA; } @Override public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) { } @Override public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) { return null; } }; this.federatedIdentity = Mockito.mock(FederatedIdentity.class); this.blockAllocator = Mockito.mock(BlockAllocator.class); }
Example #27
Source File: TestSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testParquetInt64TimestampMillisToArrow() { MessageType parquet = Types.buildMessage() .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root"); Schema expected = new Schema(asList( field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")) )); Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema()); }
Example #28
Source File: FieldIdUtil2.java From dremio-oss with Apache License 2.0 | 5 votes |
public static TypedFieldId getFieldId(Schema schema, BasePath path, boolean isHyper){ int i = 0; for (Field f : schema.getFields()) { TypedFieldId id = getFieldId(f, i, path, isHyper); if (id != null) { return id; } i++; } return null; }
Example #29
Source File: UserDefinedFunctionRequestSerDeTest.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@Before public void beforeTest() throws IOException { Schema inputSchema = SchemaBuilder.newBuilder() .addField("factor1", Types.MinorType.INT.getType()) .addField("factor2", Types.MinorType.INT.getType()) .build(); Schema outputSchema = SchemaBuilder.newBuilder() .addField("product", Types.MinorType.INT.getType()) .build(); Block inputRecords = allocator.createBlock(inputSchema); inputRecords.setRowCount(1); IntVector inputVector1 = (IntVector) inputRecords.getFieldVector("factor1"); IntVector inputVector2 = (IntVector) inputRecords.getFieldVector("factor2"); inputVector1.setSafe(0, 2); inputVector2.setSafe(0, 3); expected = new UserDefinedFunctionRequest(federatedIdentity, inputRecords, outputSchema, "test-method", UserDefinedFunctionType.SCALAR); String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "UserDefinedFunctionRequest.json"); expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim(); }
Example #30
Source File: TestSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testParquetInt32TimeMillisToArrow() { MessageType parquet = Types.buildMessage() .addField(Types.optional(INT32).as(TIME_MILLIS).named("a")).named("root"); Schema expected = new Schema(asList( field("a", new ArrowType.Time(TimeUnit.MILLISECOND, 32)) )); Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema()); }