org.apache.flink.table.descriptors.Schema Java Examples
The following examples show how to use
org.apache.flink.table.descriptors.Schema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkPravegaTableFactoryTest.java From flink-connectors with Apache License 2.0 | 6 votes |
/** * Rowtime attribute should be of type TIMESTAMP. */ @Test (expected = ValidationException.class) public void testWrongRowTimeAttributeType() { final Schema schema = new Schema() .field("name", DataTypes.STRING()) .field("age", DataTypes.INT()).rowtime(new Rowtime() .timestampsFromField("age") .watermarksFromStrategy( new BoundedOutOfOrderTimestamps(30000L))); Pravega pravega = new Pravega(); Stream stream = Stream.of(SCOPE, STREAM); pravega.tableSourceReaderBuilder() .forStream(stream) .withPravegaConfig(PRAVEGA_CONFIG); final TestTableDescriptor testDesc = new TestTableDescriptor(pravega) .withFormat(JSON) .withSchema(schema) .inAppendMode(); final Map<String, String> propertiesMap = testDesc.toProperties(); FlinkPravegaTableFactoryBase tableFactoryBase = new FlinkPravegaStreamTableSourceFactory(); tableFactoryBase.createFlinkPravegaTableSource(propertiesMap); fail("Schema validation failed"); }
Example #2
Source File: CsvRowFormatFactoryTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSchemaDerivation() { final Map<String, String> properties = new HashMap<>(); properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties()); properties.putAll(new Csv().deriveSchema().toProperties()); final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build(); final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build(); final SerializationSchema<?> actualSer = TableFactoryService .find(SerializationSchemaFactory.class, properties) .createSerializationSchema(properties); assertEquals(expectedSer, actualSer); final DeserializationSchema<?> actualDeser = TableFactoryService .find(DeserializationSchemaFactory.class, properties) .createDeserializationSchema(properties); assertEquals(expectedDeser, actualDeser); }
Example #3
Source File: TableFactoryService.java From flink with Apache License 2.0 | 6 votes |
/** * Performs filtering for special cases (i.e. table format factories with schema derivation). */ private static List<String> filterSupportedPropertiesFactorySpecific(TableFactory factory, List<String> keys) { if (factory instanceof TableFormatFactory) { boolean includeSchema = ((TableFormatFactory) factory).supportsSchemaDerivation(); return keys.stream().filter(k -> { if (includeSchema) { return k.startsWith(Schema.SCHEMA + ".") || k.startsWith(FormatDescriptorValidator.FORMAT + "."); } else { return k.startsWith(FormatDescriptorValidator.FORMAT + "."); } }).collect(Collectors.toList()); } else { return keys; } }
Example #4
Source File: KafkaTableSourceSinkFactoryTestBase.java From flink with Apache License 2.0 | 6 votes |
protected Map<String, String> createKafkaSinkProperties() { return new TestTableDescriptor( new Kafka() .version(getKafkaVersion()) .topic(TOPIC) .properties(KAFKA_PROPERTIES) .sinkPartitionerFixed() .startFromSpecificOffsets(OFFSETS)) // test if they accepted although not needed .withFormat(new TestTableFormat()) .withSchema( new Schema() .field(FRUIT_NAME, DataTypes.STRING()) .field(COUNT, DataTypes.DECIMAL(10, 4)) .field(EVENT_TIME, DataTypes.TIMESTAMP(3))) .inAppendMode() .toProperties(); }
Example #5
Source File: CatalogTableImpTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testToProperties() { TableSchema schema = createTableSchema(); Map<String, String> prop = createProperties(); CatalogTable table = new CatalogTableImpl( schema, createPartitionKeys(), prop, TEST ); DescriptorProperties descriptorProperties = new DescriptorProperties(); descriptorProperties.putProperties(table.toProperties()); assertEquals(schema, descriptorProperties.getTableSchema(Schema.SCHEMA)); }
Example #6
Source File: CatalogTableImpTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testToProperties() { TableSchema schema = createTableSchema(); Map<String, String> prop = createProperties(); CatalogTable table = new CatalogTableImpl( schema, createPartitionKeys(), prop, TEST ); DescriptorProperties descriptorProperties = new DescriptorProperties(); descriptorProperties.putProperties(table.toProperties()); assertEquals(schema, descriptorProperties.getTableSchema(Schema.SCHEMA)); }
Example #7
Source File: TableFactoryService.java From flink with Apache License 2.0 | 6 votes |
/** * Performs filtering for special cases (i.e. table format factories with schema derivation). */ private static List<String> filterSupportedPropertiesFactorySpecific(TableFactory factory, List<String> keys) { if (factory instanceof TableFormatFactory) { boolean includeSchema = ((TableFormatFactory) factory).supportsSchemaDerivation(); return keys.stream().filter(k -> { if (includeSchema) { return k.startsWith(Schema.SCHEMA + ".") || k.startsWith(FormatDescriptorValidator.FORMAT + "."); } else { return k.startsWith(FormatDescriptorValidator.FORMAT + "."); } }).collect(Collectors.toList()); } else { return keys; } }
Example #8
Source File: KafkaTableSourceSinkFactoryTestBase.java From flink with Apache License 2.0 | 6 votes |
protected Map<String, String> createKafkaSourceProperties() { return new TestTableDescriptor( new Kafka() .version(getKafkaVersion()) .topic(TOPIC) .properties(KAFKA_PROPERTIES) .sinkPartitionerRoundRobin() // test if accepted although not needed .startFromSpecificOffsets(OFFSETS)) .withFormat(new TestTableFormat()) .withSchema( new Schema() .field(FRUIT_NAME, DataTypes.STRING()).from(NAME) .field(COUNT, DataTypes.DECIMAL(38, 18)) // no from so it must match with the input .field(EVENT_TIME, DataTypes.TIMESTAMP(3)).rowtime( new Rowtime().timestampsFromField(TIME).watermarksPeriodicAscending()) .field(PROC_TIME, DataTypes.TIMESTAMP(3)).proctime()) .toProperties(); }
Example #9
Source File: CsvRowFormatFactoryTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSchemaDerivation() { final Map<String, String> properties = new HashMap<>(); properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties()); properties.putAll(new Csv().toProperties()); final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build(); final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build(); final SerializationSchema<?> actualSer = TableFactoryService .find(SerializationSchemaFactory.class, properties) .createSerializationSchema(properties); assertEquals(expectedSer, actualSer); final DeserializationSchema<?> actualDeser = TableFactoryService .find(DeserializationSchemaFactory.class, properties) .createDeserializationSchema(properties); assertEquals(expectedDeser, actualDeser); }
Example #10
Source File: CsvRowFormatFactoryTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testSchemaDerivation() { final Map<String, String> properties = new HashMap<>(); properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties()); properties.putAll(new Csv().deriveSchema().toProperties()); final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build(); final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build(); final SerializationSchema<?> actualSer = TableFactoryService .find(SerializationSchemaFactory.class, properties) .createSerializationSchema(properties); assertEquals(expectedSer, actualSer); final DeserializationSchema<?> actualDeser = TableFactoryService .find(DeserializationSchemaFactory.class, properties) .createDeserializationSchema(properties); assertEquals(expectedDeser, actualDeser); }
Example #11
Source File: FlinkPravegaTableFactoryTest.java From flink-connectors with Apache License 2.0 | 6 votes |
/** * Processing time attribute should be of type TIMESTAMP. */ @Test (expected = ValidationException.class) public void testWrongProcTimeAttributeType() { final Schema schema = new Schema() .field("name", DataTypes.STRING()) .field("age", DataTypes.INT()).proctime(); Pravega pravega = new Pravega(); Stream stream = Stream.of(SCOPE, STREAM); pravega.tableSourceReaderBuilder() .forStream(stream) .withPravegaConfig(PRAVEGA_CONFIG); final TestTableDescriptor testDesc = new TestTableDescriptor(pravega) .withFormat(JSON) .withSchema(schema) .inAppendMode(); final Map<String, String> propertiesMap = testDesc.toProperties(); FlinkPravegaTableFactoryBase tableFactoryBase = new FlinkPravegaStreamTableSourceFactory(); tableFactoryBase.createFlinkPravegaTableSource(propertiesMap); fail("Schema validation failed"); }
Example #12
Source File: ElasticsearchUpsertTableSinkFactoryTestBase.java From flink with Apache License 2.0 | 5 votes |
protected Map<String, String> createElasticSearchProperties() { return new TestTableDescriptor( new Elasticsearch() .version(getElasticsearchVersion()) .host(HOSTNAME, PORT, SCHEMA) .index(INDEX) .documentType(DOC_TYPE) .keyDelimiter(KEY_DELIMITER) .keyNullLiteral(KEY_NULL_LITERAL) .bulkFlushBackoffExponential() .bulkFlushBackoffDelay(123L) .bulkFlushBackoffMaxRetries(3) .bulkFlushInterval(100L) .bulkFlushMaxActions(1000) .bulkFlushMaxSize("1 MB") .failureHandlerCustom(DummyFailureHandler.class) .connectionMaxRetryTimeout(100) .connectionPathPrefix("/myapp")) .withFormat( new Json() .deriveSchema()) .withSchema( new Schema() .field(FIELD_KEY, DataTypes.BIGINT()) .field(FIELD_FRUIT_NAME, DataTypes.STRING()) .field(FIELD_COUNT, DataTypes.DECIMAL(10, 4)) .field(FIELD_TS, DataTypes.TIMESTAMP(3))) .inUpsertMode() .toProperties(); }
Example #13
Source File: FlinkTableITCase.java From flink-connectors with Apache License 2.0 | 5 votes |
@Test public void testBatchTableSinkUsingDescriptor() throws Exception { // create a Pravega stream for test purposes Stream stream = Stream.of(setupUtils.getScope(), "testBatchTableSinkUsingDescriptor"); this.setupUtils.createTestStream(stream.getStreamName(), 1); // create a Flink Table environment ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(1); BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env); Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES)); Pravega pravega = new Pravega(); pravega.tableSinkWriterBuilder() .withRoutingKeyField("category") .forStream(stream) .withPravegaConfig(setupUtils.getPravegaConfig()); ConnectTableDescriptor desc = tableEnv.connect(pravega) .withFormat(new Json().failOnMissingField(true)) .withSchema(new Schema().field("category", DataTypes.STRING()). field("value", DataTypes.INT())); desc.createTemporaryTable("test"); final Map<String, String> propertiesMap = desc.toProperties(); final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap) .createBatchTableSink(propertiesMap); String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink"; ConnectorCatalogTable<?, ?> connectorCatalogSinkTable = ConnectorCatalogTable.sink(sink, true); tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable( ObjectPath.fromString(tableSinkPath), connectorCatalogSinkTable, false); table.insertInto("PravegaSink"); env.execute(); }
Example #14
Source File: RedisDescriptorTest.java From bahir-flink with Apache License 2.0 | 5 votes |
@Test public void testRedisDescriptor() throws Exception { DataStreamSource<Row> source = (DataStreamSource<Row>) env.addSource(new TestSourceFunctionString()) .returns(new RowTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Long.class))); EnvironmentSettings settings = EnvironmentSettings .newInstance() .useOldPlanner() .inStreamingMode() .build(); StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, settings); tableEnvironment.registerDataStream("t1", source, "k, v"); Redis redis = new Redis() .mode(RedisValidator.REDIS_CLUSTER) .command(RedisCommand.INCRBY_EX.name()) .ttl(100000) .property(RedisValidator.REDIS_NODES, REDIS_HOST+ ":" + REDIS_PORT); tableEnvironment .connect(redis).withSchema(new Schema() .field("k", TypeInformation.of(String.class)) .field("v", TypeInformation.of(Long.class))) .registerTableSink("redis"); tableEnvironment.sqlUpdate("insert into redis select k, v from t1"); env.execute("Test Redis Table"); }
Example #15
Source File: JsonRowFormatFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSchemaDerivation() { final Map<String, String> properties = toMap( new Schema() .field("field1", Types.BOOLEAN()) .field("field2", Types.INT()) .field("proctime", Types.SQL_TIMESTAMP()).proctime(), new Json() .deriveSchema()); testSchemaSerializationSchema(properties); testSchemaDeserializationSchema(properties); }
Example #16
Source File: CatalogTableImpl.java From flink with Apache License 2.0 | 5 votes |
@Override public Map<String, String> toProperties() { DescriptorProperties descriptor = new DescriptorProperties(); descriptor.putTableSchema(Schema.SCHEMA, getSchema()); descriptor.putPartitionKeys(getPartitionKeys()); Map<String, String> properties = new HashMap<>(getProperties()); properties.remove(CatalogConfig.IS_GENERIC); descriptor.putProperties(properties); return descriptor.asMap(); }
Example #17
Source File: CatalogTableImpl.java From flink with Apache License 2.0 | 5 votes |
/** * Construct a {@link CatalogTableImpl} from complete properties that contains table schema. */ public static CatalogTableImpl fromProperties(Map<String, String> properties) { DescriptorProperties descriptorProperties = new DescriptorProperties(); descriptorProperties.putProperties(properties); TableSchema tableSchema = descriptorProperties.getTableSchema(Schema.SCHEMA); List<String> partitionKeys = descriptorProperties.getPartitionKeys(); return new CatalogTableImpl( tableSchema, partitionKeys, removeRedundant(properties, tableSchema, partitionKeys), "" ); }
Example #18
Source File: CatalogTableImpl.java From flink with Apache License 2.0 | 5 votes |
/** * Construct catalog table properties from {@link #toProperties()}. */ public static Map<String, String> removeRedundant( Map<String, String> properties, TableSchema schema, List<String> partitionKeys) { Map<String, String> ret = new HashMap<>(properties); DescriptorProperties descriptorProperties = new DescriptorProperties(); descriptorProperties.putTableSchema(Schema.SCHEMA, schema); descriptorProperties.putPartitionKeys(partitionKeys); descriptorProperties.asMap().keySet().forEach(ret::remove); return ret; }
Example #19
Source File: TableExampleWordCount.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); blinkStreamEnv.setParallelism(1); EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance() .useBlinkPlanner() .inStreamingMode() .build(); StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings); String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath(); blinkStreamTableEnv .connect(new FileSystem().path(path)) .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n")) .withSchema(new Schema().field("word", Types.STRING)) .inAppendMode() .registerTableSource("FlieSourceTable"); Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable") .groupBy("word") .select("word,count(word) as _count"); blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print(); //打印结果中的 true 和 false,可能会有点疑问,为啥会多出一个字段。 //Sink 做的事情是先删除再插入,false 表示删除上一条数据,true 表示插入该条数据 blinkStreamTableEnv.execute("Blink Stream SQL Job"); }
Example #20
Source File: TableSourceFactoryMock.java From flink with Apache License 2.0 | 5 votes |
@Override public TableSource<Row> createTableSource(Map<String, String> properties) { final DescriptorProperties descriptorProperties = new DescriptorProperties(); descriptorProperties.putProperties(properties); final TableSchema schema = descriptorProperties.getTableSchema(Schema.SCHEMA); return new TableSourceMock(schema.toRowDataType(), schema); }
Example #21
Source File: TableEnvironmentTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConnect() throws Exception { final TableEnvironmentMock tableEnv = TableEnvironmentMock.getStreamingInstance(); tableEnv .connect(new ConnectorDescriptorMock(TableSourceFactoryMock.CONNECTOR_TYPE_VALUE, 1, true)) .withFormat(new FormatDescriptorMock("my_format", 1)) .withSchema(new Schema() .field("my_field_0", "INT") .field("my_field_1", "BOOLEAN")) .inAppendMode() .registerTableSource("my_table"); final Catalog catalog = tableEnv.getCatalog(EnvironmentSettings.DEFAULT_BUILTIN_CATALOG) .orElseThrow(AssertionError::new); final CatalogBaseTable table = catalog .getTable(new ObjectPath(EnvironmentSettings.DEFAULT_BUILTIN_DATABASE, "my_table")); assertThat( table.getSchema(), equalTo( TableSchema.builder() .field("my_field_0", DataTypes.INT()) .field("my_field_1", DataTypes.BOOLEAN()) .build())); final ConnectorCatalogTable<?, ?> connectorCatalogTable = (ConnectorCatalogTable<?, ?>) table; assertThat( connectorCatalogTable.getTableSource().isPresent(), equalTo(true)); }
Example #22
Source File: TableEnvironmentTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConnect() { final TableEnvironmentMock tableEnv = TableEnvironmentMock.getStreamingInstance(); tableEnv .connect(new ConnectorDescriptorMock(TableSourceFactoryMock.CONNECTOR_TYPE_VALUE, 1, true)) .withFormat(new FormatDescriptorMock("my_format", 1)) .withSchema(new Schema() .field("my_field_0", "INT") .field("my_field_1", "BOOLEAN") .field("my_part_1", "BIGINT") .field("my_part_2", "STRING")) .withPartitionKeys(Arrays.asList("my_part_1", "my_part_2")) .inAppendMode() .createTemporaryTable("my_table"); CatalogManager.TableLookupResult lookupResult = tableEnv.catalogManager.getTable(ObjectIdentifier.of( EnvironmentSettings.DEFAULT_BUILTIN_CATALOG, EnvironmentSettings.DEFAULT_BUILTIN_DATABASE, "my_table")) .orElseThrow(AssertionError::new); assertThat(lookupResult.isTemporary(), equalTo(true)); CatalogBaseTable catalogBaseTable = lookupResult.getTable(); assertTrue(catalogBaseTable instanceof CatalogTable); CatalogTable table = (CatalogTable) catalogBaseTable; assertCatalogTable(table); assertCatalogTable(CatalogTableImpl.fromProperties(table.toProperties())); }
Example #23
Source File: TableSourceFactoryMock.java From flink with Apache License 2.0 | 5 votes |
@Override public TableSource<Row> createTableSource(Map<String, String> properties) { final DescriptorProperties descriptorProperties = new DescriptorProperties(); descriptorProperties.putProperties(properties); final TableSchema schema = TableSchemaUtils.getPhysicalSchema( descriptorProperties.getTableSchema(Schema.SCHEMA)); return new TableSourceMock(schema); }
Example #24
Source File: TableExampleWordCount.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); blinkStreamEnv.setParallelism(1); EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance() .useBlinkPlanner() .inStreamingMode() .build(); StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings); String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath(); blinkStreamTableEnv .connect(new FileSystem().path(path)) .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n")) .withSchema(new Schema().field("word", Types.STRING)) .inAppendMode() .registerTableSource("FlieSourceTable"); Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable") .groupBy("word") .select("word,count(word) as _count"); blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print(); //打印结果中的 true 和 false,可能会有点疑问,为啥会多出一个字段。 //Sink 做的事情是先删除再插入,false 表示删除上一条数据,true 表示插入该条数据 blinkStreamTableEnv.execute("Blink Stream SQL Job"); }
Example #25
Source File: JsonRowFormatFactoryTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSchemaDerivation() { final Map<String, String> properties = toMap( new Schema() .field("field1", Types.BOOLEAN()) .field("field2", Types.INT()) .field("proctime", Types.SQL_TIMESTAMP()).proctime(), new Json() .deriveSchema()); testSchemaSerializationSchema(properties); testSchemaDeserializationSchema(properties); }
Example #26
Source File: JsonRowFormatFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSchemaDerivation() { final Map<String, String> properties = toMap( new Schema() .field("field1", Types.BOOLEAN()) .field("field2", Types.INT()) .field("proctime", Types.SQL_TIMESTAMP()).proctime(), new Json() .deriveSchema()); testSchemaSerializationSchema(properties); testSchemaDeserializationSchema(properties); }
Example #27
Source File: JsonRowFormatFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSchemaDerivationByDefault() { final Map<String, String> properties = toMap( new Schema() .field("field1", Types.BOOLEAN()) .field("field2", Types.INT()) .field("proctime", Types.SQL_TIMESTAMP()).proctime(), new Json()); testSchemaSerializationSchema(properties); testSchemaDeserializationSchema(properties); }
Example #28
Source File: FlinkTableITCase.java From flink-connectors with Apache License 2.0 | 4 votes |
/** * Validates the use of Pravega Table Descriptor to generate the source/sink Table factory to * write and read from Pravega stream using {@link BatchTableEnvironment} * @throws Exception */ @Test public void testBatchTableUsingDescriptor() throws Exception { final String scope = setupUtils.getScope(); final String streamName = "stream"; Stream stream = Stream.of(scope, streamName); this.setupUtils.createTestStream(stream.getStreamName(), 1); ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(1); BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env); PravegaConfig pravegaConfig = setupUtils.getPravegaConfig(); Pravega pravega = new Pravega(); pravega.tableSinkWriterBuilder() .withRoutingKeyField("category") .forStream(stream) .withPravegaConfig(pravegaConfig); pravega.tableSourceReaderBuilder() .withReaderGroupScope(stream.getScope()) .forStream(stream) .withPravegaConfig(pravegaConfig); ConnectTableDescriptor desc = tableEnv.connect(pravega) .withFormat(new Json().failOnMissingField(false)) .withSchema(new Schema(). field("category", DataTypes.STRING()). field("value", DataTypes.INT())); desc.createTemporaryTable("test"); final Map<String, String> propertiesMap = desc.toProperties(); final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap) .createBatchTableSink(propertiesMap); final TableSource<?> source = TableFactoryService.find(BatchTableSourceFactory.class, propertiesMap) .createBatchTableSource(propertiesMap); Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES)); String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink"; ConnectorCatalogTable<?, ?> connectorCatalogTableSink = ConnectorCatalogTable.sink(sink, true); tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable( ObjectPath.fromString(tableSinkPath), connectorCatalogTableSink, false); table.insertInto("PravegaSink"); env.execute(); String tableSourcePath = tableEnv.getCurrentDatabase() + "." + "samples"; ConnectorCatalogTable<?, ?> connectorCatalogTableSource = ConnectorCatalogTable.source(source, true); tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable( ObjectPath.fromString(tableSourcePath), connectorCatalogTableSource, false); // select some sample data from the Pravega-backed table, as a view Table view = tableEnv.sqlQuery("SELECT * FROM samples WHERE category IN ('A','B')"); // convert the view to a dataset and collect the results for comparison purposes List<SampleRecord> results = tableEnv.toDataSet(view, SampleRecord.class).collect(); Assert.assertEquals(new HashSet<>(SAMPLES), new HashSet<>(results)); }
Example #29
Source File: HiveTableUtil.java From flink with Apache License 2.0 | 4 votes |
public static Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table, HiveConf hiveConf) { if (!(table instanceof CatalogTableImpl) && !(table instanceof CatalogViewImpl)) { throw new CatalogException( "HiveCatalog only supports CatalogTableImpl and CatalogViewImpl"); } // let Hive set default parameters for us, e.g. serialization.format Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(), tablePath.getObjectName()); hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); Map<String, String> properties = new HashMap<>(table.getProperties()); // Table comment if (table.getComment() != null) { properties.put(HiveCatalogConfig.COMMENT, table.getComment()); } boolean isGeneric = HiveCatalog.isGenericForCreate(properties); // Hive table's StorageDescriptor StorageDescriptor sd = hiveTable.getSd(); HiveTableUtil.setDefaultStorageFormat(sd, hiveConf); if (isGeneric) { DescriptorProperties tableSchemaProps = new DescriptorProperties(true); tableSchemaProps.putTableSchema(Schema.SCHEMA, table.getSchema()); if (table instanceof CatalogTable) { tableSchemaProps.putPartitionKeys(((CatalogTable) table).getPartitionKeys()); } properties.putAll(tableSchemaProps.asMap()); properties = maskFlinkProperties(properties); hiveTable.setParameters(properties); } else { HiveTableUtil.initiateTableFromProperties(hiveTable, properties, hiveConf); List<FieldSchema> allColumns = HiveTableUtil.createHiveColumns(table.getSchema()); // Table columns and partition keys if (table instanceof CatalogTableImpl) { CatalogTable catalogTable = (CatalogTableImpl) table; if (catalogTable.isPartitioned()) { int partitionKeySize = catalogTable.getPartitionKeys().size(); List<FieldSchema> regularColumns = allColumns.subList(0, allColumns.size() - partitionKeySize); List<FieldSchema> partitionColumns = allColumns.subList(allColumns.size() - partitionKeySize, allColumns.size()); sd.setCols(regularColumns); hiveTable.setPartitionKeys(partitionColumns); } else { sd.setCols(allColumns); hiveTable.setPartitionKeys(new ArrayList<>()); } } else { sd.setCols(allColumns); } // Table properties hiveTable.getParameters().putAll(properties); } if (table instanceof CatalogViewImpl) { // TODO: [FLINK-12398] Support partitioned view in catalog API hiveTable.setPartitionKeys(new ArrayList<>()); CatalogView view = (CatalogView) table; hiveTable.setViewOriginalText(view.getOriginalQuery()); hiveTable.setViewExpandedText(view.getExpandedQuery()); hiveTable.setTableType(TableType.VIRTUAL_VIEW.name()); } return hiveTable; }
Example #30
Source File: HiveCatalog.java From flink with Apache License 2.0 | 4 votes |
private CatalogBaseTable instantiateCatalogTable(Table hiveTable, HiveConf hiveConf) { boolean isView = TableType.valueOf(hiveTable.getTableType()) == TableType.VIRTUAL_VIEW; // Table properties Map<String, String> properties = hiveTable.getParameters(); boolean isGeneric = isGenericForGet(hiveTable.getParameters()); TableSchema tableSchema; // Partition keys List<String> partitionKeys = new ArrayList<>(); if (isGeneric) { properties = retrieveFlinkProperties(properties); DescriptorProperties tableSchemaProps = new DescriptorProperties(true); tableSchemaProps.putProperties(properties); ObjectPath tablePath = new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName()); tableSchema = tableSchemaProps.getOptionalTableSchema(Schema.SCHEMA) .orElseThrow(() -> new CatalogException("Failed to get table schema from properties for generic table " + tablePath)); partitionKeys = tableSchemaProps.getPartitionKeys(); // remove the schema from properties properties = CatalogTableImpl.removeRedundant(properties, tableSchema, partitionKeys); } else { properties.put(CatalogConfig.IS_GENERIC, String.valueOf(false)); // Table schema List<FieldSchema> fields = getNonPartitionFields(hiveConf, hiveTable); Set<String> notNullColumns = client.getNotNullColumns(hiveConf, hiveTable.getDbName(), hiveTable.getTableName()); Optional<UniqueConstraint> primaryKey = isView ? Optional.empty() : client.getPrimaryKey(hiveTable.getDbName(), hiveTable.getTableName(), HiveTableUtil.relyConstraint((byte) 0)); // PK columns cannot be null primaryKey.ifPresent(pk -> notNullColumns.addAll(pk.getColumns())); tableSchema = HiveTableUtil.createTableSchema(fields, hiveTable.getPartitionKeys(), notNullColumns, primaryKey.orElse(null)); if (!hiveTable.getPartitionKeys().isEmpty()) { partitionKeys = getFieldNames(hiveTable.getPartitionKeys()); } } String comment = properties.remove(HiveCatalogConfig.COMMENT); if (isView) { return new CatalogViewImpl( hiveTable.getViewOriginalText(), hiveTable.getViewExpandedText(), tableSchema, properties, comment); } else { return new CatalogTableImpl(tableSchema, partitionKeys, properties, comment); } }