org.apache.flink.table.descriptors.Schema Java Exaples

Source File: FlinkPravegaTableFactoryTest.java From flink-connectors with Apache License 2.0

6 votes

/**
 * Rowtime attribute should be of type TIMESTAMP.
 */
@Test (expected = ValidationException.class)
public void testWrongRowTimeAttributeType() {
    final Schema schema = new Schema()
            .field("name", DataTypes.STRING())
            .field("age", DataTypes.INT()).rowtime(new Rowtime()
                                                            .timestampsFromField("age")
                                                            .watermarksFromStrategy(
                                                                    new BoundedOutOfOrderTimestamps(30000L)));
    Pravega pravega = new Pravega();
    Stream stream = Stream.of(SCOPE, STREAM);
    pravega.tableSourceReaderBuilder()
            .forStream(stream)
            .withPravegaConfig(PRAVEGA_CONFIG);
    final TestTableDescriptor testDesc = new TestTableDescriptor(pravega)
            .withFormat(JSON)
            .withSchema(schema)
            .inAppendMode();
    final Map<String, String> propertiesMap = testDesc.toProperties();
    FlinkPravegaTableFactoryBase tableFactoryBase = new FlinkPravegaStreamTableSourceFactory();
    tableFactoryBase.createFlinkPravegaTableSource(propertiesMap);
    fail("Schema validation failed");
}

Source File: CsvRowFormatFactoryTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = new HashMap<>();
	properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties());
	properties.putAll(new Csv().deriveSchema().toProperties());

	final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build();
	final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build();

	final SerializationSchema<?> actualSer = TableFactoryService
		.find(SerializationSchemaFactory.class, properties)
		.createSerializationSchema(properties);

	assertEquals(expectedSer, actualSer);

	final DeserializationSchema<?> actualDeser = TableFactoryService
		.find(DeserializationSchemaFactory.class, properties)
		.createDeserializationSchema(properties);

	assertEquals(expectedDeser, actualDeser);
}

Source File: TableFactoryService.java From flink with Apache License 2.0

6 votes

/**
 * Performs filtering for special cases (i.e. table format factories with schema derivation).
 */
private static List<String> filterSupportedPropertiesFactorySpecific(TableFactory factory, List<String> keys) {

	if (factory instanceof TableFormatFactory) {
		boolean includeSchema = ((TableFormatFactory) factory).supportsSchemaDerivation();
		return keys.stream().filter(k -> {
			if (includeSchema) {
				return k.startsWith(Schema.SCHEMA + ".") ||
					k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			} else {
				return k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			}
		}).collect(Collectors.toList());
	} else {
		return keys;
	}
}

Source File: KafkaTableSourceSinkFactoryTestBase.java From flink with Apache License 2.0

6 votes

protected Map<String, String> createKafkaSinkProperties() {
	return new TestTableDescriptor(
		new Kafka()
			.version(getKafkaVersion())
			.topic(TOPIC)
			.properties(KAFKA_PROPERTIES)
			.sinkPartitionerFixed()
			.startFromSpecificOffsets(OFFSETS)) // test if they accepted although not needed
		.withFormat(new TestTableFormat())
		.withSchema(
			new Schema()
				.field(FRUIT_NAME, DataTypes.STRING())
				.field(COUNT, DataTypes.DECIMAL(10, 4))
				.field(EVENT_TIME, DataTypes.TIMESTAMP(3)))
		.inAppendMode()
		.toProperties();
}

Source File: CatalogTableImpTest.java From flink with Apache License 2.0

6 votes

@Test
public void testToProperties() {
	TableSchema schema = createTableSchema();
	Map<String, String> prop = createProperties();
	CatalogTable table = new CatalogTableImpl(
		schema,
		createPartitionKeys(),
		prop,
		TEST
	);

	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(table.toProperties());

	assertEquals(schema, descriptorProperties.getTableSchema(Schema.SCHEMA));
}

Source File: CatalogTableImpTest.java From flink with Apache License 2.0

6 votes

@Test
public void testToProperties() {
	TableSchema schema = createTableSchema();
	Map<String, String> prop = createProperties();
	CatalogTable table = new CatalogTableImpl(
		schema,
		createPartitionKeys(),
		prop,
		TEST
	);

	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(table.toProperties());

	assertEquals(schema, descriptorProperties.getTableSchema(Schema.SCHEMA));
}

Source File: TableFactoryService.java From flink with Apache License 2.0

6 votes

/**
 * Performs filtering for special cases (i.e. table format factories with schema derivation).
 */
private static List<String> filterSupportedPropertiesFactorySpecific(TableFactory factory, List<String> keys) {

	if (factory instanceof TableFormatFactory) {
		boolean includeSchema = ((TableFormatFactory) factory).supportsSchemaDerivation();
		return keys.stream().filter(k -> {
			if (includeSchema) {
				return k.startsWith(Schema.SCHEMA + ".") ||
					k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			} else {
				return k.startsWith(FormatDescriptorValidator.FORMAT + ".");
			}
		}).collect(Collectors.toList());
	} else {
		return keys;
	}
}

Source File: KafkaTableSourceSinkFactoryTestBase.java From flink with Apache License 2.0

6 votes

protected Map<String, String> createKafkaSourceProperties() {
	return new TestTableDescriptor(
			new Kafka()
				.version(getKafkaVersion())
				.topic(TOPIC)
				.properties(KAFKA_PROPERTIES)
				.sinkPartitionerRoundRobin() // test if accepted although not needed
				.startFromSpecificOffsets(OFFSETS))
			.withFormat(new TestTableFormat())
			.withSchema(
				new Schema()
					.field(FRUIT_NAME, DataTypes.STRING()).from(NAME)
					.field(COUNT, DataTypes.DECIMAL(38, 18)) // no from so it must match with the input
					.field(EVENT_TIME, DataTypes.TIMESTAMP(3)).rowtime(
						new Rowtime().timestampsFromField(TIME).watermarksPeriodicAscending())
					.field(PROC_TIME, DataTypes.TIMESTAMP(3)).proctime())
			.toProperties();
}

Source File: CsvRowFormatFactoryTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = new HashMap<>();
	properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties());
	properties.putAll(new Csv().toProperties());

	final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build();
	final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build();

	final SerializationSchema<?> actualSer = TableFactoryService
		.find(SerializationSchemaFactory.class, properties)
		.createSerializationSchema(properties);

	assertEquals(expectedSer, actualSer);

	final DeserializationSchema<?> actualDeser = TableFactoryService
		.find(DeserializationSchemaFactory.class, properties)
		.createDeserializationSchema(properties);

	assertEquals(expectedDeser, actualDeser);
}

Source File: CsvRowFormatFactoryTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = new HashMap<>();
	properties.putAll(new Schema().schema(TableSchema.fromTypeInfo(SCHEMA)).toProperties());
	properties.putAll(new Csv().deriveSchema().toProperties());

	final CsvRowSerializationSchema expectedSer = new CsvRowSerializationSchema.Builder(SCHEMA).build();
	final CsvRowDeserializationSchema expectedDeser = new CsvRowDeserializationSchema.Builder(SCHEMA).build();

	final SerializationSchema<?> actualSer = TableFactoryService
		.find(SerializationSchemaFactory.class, properties)
		.createSerializationSchema(properties);

	assertEquals(expectedSer, actualSer);

	final DeserializationSchema<?> actualDeser = TableFactoryService
		.find(DeserializationSchemaFactory.class, properties)
		.createDeserializationSchema(properties);

	assertEquals(expectedDeser, actualDeser);
}

Source File: FlinkPravegaTableFactoryTest.java From flink-connectors with Apache License 2.0

6 votes

/**
 * Processing time attribute should be of type TIMESTAMP.
 */
@Test (expected = ValidationException.class)
public void testWrongProcTimeAttributeType() {
    final Schema schema = new Schema()
            .field("name", DataTypes.STRING())
            .field("age", DataTypes.INT()).proctime();

    Pravega pravega = new Pravega();
    Stream stream = Stream.of(SCOPE, STREAM);
    pravega.tableSourceReaderBuilder()
            .forStream(stream)
            .withPravegaConfig(PRAVEGA_CONFIG);
    final TestTableDescriptor testDesc = new TestTableDescriptor(pravega)
            .withFormat(JSON)
            .withSchema(schema)
            .inAppendMode();
    final Map<String, String> propertiesMap = testDesc.toProperties();
    FlinkPravegaTableFactoryBase tableFactoryBase = new FlinkPravegaStreamTableSourceFactory();
    tableFactoryBase.createFlinkPravegaTableSource(propertiesMap);
    fail("Schema validation failed");
}

Source File: ElasticsearchUpsertTableSinkFactoryTestBase.java From flink with Apache License 2.0

5 votes

protected Map<String, String> createElasticSearchProperties() {
	return new TestTableDescriptor(
		new Elasticsearch()
			.version(getElasticsearchVersion())
			.host(HOSTNAME, PORT, SCHEMA)
			.index(INDEX)
			.documentType(DOC_TYPE)
			.keyDelimiter(KEY_DELIMITER)
			.keyNullLiteral(KEY_NULL_LITERAL)
			.bulkFlushBackoffExponential()
			.bulkFlushBackoffDelay(123L)
			.bulkFlushBackoffMaxRetries(3)
			.bulkFlushInterval(100L)
			.bulkFlushMaxActions(1000)
			.bulkFlushMaxSize("1 MB")
			.failureHandlerCustom(DummyFailureHandler.class)
			.connectionMaxRetryTimeout(100)
			.connectionPathPrefix("/myapp"))
		.withFormat(
			new Json()
				.deriveSchema())
		.withSchema(
			new Schema()
				.field(FIELD_KEY, DataTypes.BIGINT())
				.field(FIELD_FRUIT_NAME, DataTypes.STRING())
				.field(FIELD_COUNT, DataTypes.DECIMAL(10, 4))
				.field(FIELD_TS, DataTypes.TIMESTAMP(3)))
		.inUpsertMode()
		.toProperties();
}

Source File: FlinkTableITCase.java From flink-connectors with Apache License 2.0

5 votes

@Test
public void testBatchTableSinkUsingDescriptor() throws Exception {

    // create a Pravega stream for test purposes
    Stream stream = Stream.of(setupUtils.getScope(), "testBatchTableSinkUsingDescriptor");
    this.setupUtils.createTestStream(stream.getStreamName(), 1);

    // create a Flink Table environment
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(1);
    BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);

    Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES));

    Pravega pravega = new Pravega();
    pravega.tableSinkWriterBuilder()
            .withRoutingKeyField("category")
            .forStream(stream)
            .withPravegaConfig(setupUtils.getPravegaConfig());

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(true))
            .withSchema(new Schema().field("category", DataTypes.STRING()).
                    field("value", DataTypes.INT()));
    desc.createTemporaryTable("test");

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap)
            .createBatchTableSink(propertiesMap);

    String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink";

    ConnectorCatalogTable<?, ?> connectorCatalogSinkTable = ConnectorCatalogTable.sink(sink, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSinkPath),
            connectorCatalogSinkTable, false);
    table.insertInto("PravegaSink");
    env.execute();
}

Source File: RedisDescriptorTest.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisDescriptor() throws Exception {
    DataStreamSource<Row> source = (DataStreamSource<Row>) env.addSource(new TestSourceFunctionString())
            .returns(new RowTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Long.class)));

    EnvironmentSettings settings = EnvironmentSettings
            .newInstance()
            .useOldPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, settings);
    tableEnvironment.registerDataStream("t1", source, "k, v");

    Redis redis = new Redis()
            .mode(RedisValidator.REDIS_CLUSTER)
            .command(RedisCommand.INCRBY_EX.name())
            .ttl(100000)
            .property(RedisValidator.REDIS_NODES, REDIS_HOST+ ":" + REDIS_PORT);

    tableEnvironment
            .connect(redis).withSchema(new Schema()
            .field("k", TypeInformation.of(String.class))
            .field("v", TypeInformation.of(Long.class)))
            .registerTableSink("redis");


    tableEnvironment.sqlUpdate("insert into redis select k, v from t1");
    env.execute("Test Redis Table");
}

Source File: JsonRowFormatFactoryTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json()
			.deriveSchema());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}

Source File: CatalogTableImpl.java From flink with Apache License 2.0

5 votes

@Override
public Map<String, String> toProperties() {
	DescriptorProperties descriptor = new DescriptorProperties();

	descriptor.putTableSchema(Schema.SCHEMA, getSchema());
	descriptor.putPartitionKeys(getPartitionKeys());

	Map<String, String> properties = new HashMap<>(getProperties());
	properties.remove(CatalogConfig.IS_GENERIC);

	descriptor.putProperties(properties);

	return descriptor.asMap();
}

Source File: CatalogTableImpl.java From flink with Apache License 2.0

5 votes

/**
 * Construct a {@link CatalogTableImpl} from complete properties that contains table schema.
 */
public static CatalogTableImpl fromProperties(Map<String, String> properties) {
	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(properties);
	TableSchema tableSchema = descriptorProperties.getTableSchema(Schema.SCHEMA);
	List<String> partitionKeys = descriptorProperties.getPartitionKeys();
	return new CatalogTableImpl(
			tableSchema,
			partitionKeys,
			removeRedundant(properties, tableSchema, partitionKeys),
			""
	);
}

Source File: CatalogTableImpl.java From flink with Apache License 2.0

5 votes

/**
 * Construct catalog table properties from {@link #toProperties()}.
 */
public static Map<String, String> removeRedundant(
		Map<String, String> properties,
		TableSchema schema,
		List<String> partitionKeys) {
	Map<String, String> ret = new HashMap<>(properties);
	DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putTableSchema(Schema.SCHEMA, schema);
	descriptorProperties.putPartitionKeys(partitionKeys);
	descriptorProperties.asMap().keySet().forEach(ret::remove);
	return ret;
}

Source File: TableExampleWordCount.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();
    blinkStreamTableEnv
            .connect(new FileSystem().path(path))
            .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n"))
            .withSchema(new Schema().field("word", Types.STRING))
            .inAppendMode()
            .registerTableSource("FlieSourceTable");

    Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable")
            .groupBy("word")
            .select("word,count(word) as _count");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    //打印结果中的 true 和 false，可能会有点疑问，为啥会多出一个字段。
    //Sink 做的事情是先删除再插入，false 表示删除上一条数据，true 表示插入该条数据

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}

Source File: TableSourceFactoryMock.java From flink with Apache License 2.0

5 votes

@Override
public TableSource<Row> createTableSource(Map<String, String> properties) {
	final DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(properties);
	final TableSchema schema = descriptorProperties.getTableSchema(Schema.SCHEMA);
	return new TableSourceMock(schema.toRowDataType(), schema);
}

Source File: TableEnvironmentTest.java From flink with Apache License 2.0

5 votes

@Test
public void testConnect() throws Exception {
	final TableEnvironmentMock tableEnv = TableEnvironmentMock.getStreamingInstance();

	tableEnv
		.connect(new ConnectorDescriptorMock(TableSourceFactoryMock.CONNECTOR_TYPE_VALUE, 1, true))
		.withFormat(new FormatDescriptorMock("my_format", 1))
		.withSchema(new Schema()
			.field("my_field_0", "INT")
			.field("my_field_1", "BOOLEAN"))
		.inAppendMode()
		.registerTableSource("my_table");

	final Catalog catalog = tableEnv.getCatalog(EnvironmentSettings.DEFAULT_BUILTIN_CATALOG)
		.orElseThrow(AssertionError::new);

	final CatalogBaseTable table = catalog
		.getTable(new ObjectPath(EnvironmentSettings.DEFAULT_BUILTIN_DATABASE, "my_table"));

	assertThat(
		table.getSchema(),
		equalTo(
			TableSchema.builder()
				.field("my_field_0", DataTypes.INT())
				.field("my_field_1", DataTypes.BOOLEAN())
				.build()));

	final ConnectorCatalogTable<?, ?> connectorCatalogTable = (ConnectorCatalogTable<?, ?>) table;

	assertThat(
		connectorCatalogTable.getTableSource().isPresent(),
		equalTo(true));
}

Source File: TableEnvironmentTest.java From flink with Apache License 2.0

5 votes

@Test
public void testConnect() {
	final TableEnvironmentMock tableEnv = TableEnvironmentMock.getStreamingInstance();

	tableEnv
		.connect(new ConnectorDescriptorMock(TableSourceFactoryMock.CONNECTOR_TYPE_VALUE, 1, true))
		.withFormat(new FormatDescriptorMock("my_format", 1))
		.withSchema(new Schema()
			.field("my_field_0", "INT")
			.field("my_field_1", "BOOLEAN")
			.field("my_part_1", "BIGINT")
			.field("my_part_2", "STRING"))
		.withPartitionKeys(Arrays.asList("my_part_1", "my_part_2"))
		.inAppendMode()
		.createTemporaryTable("my_table");

	CatalogManager.TableLookupResult lookupResult = tableEnv.catalogManager.getTable(ObjectIdentifier.of(
		EnvironmentSettings.DEFAULT_BUILTIN_CATALOG,
		EnvironmentSettings.DEFAULT_BUILTIN_DATABASE,
		"my_table"))
		.orElseThrow(AssertionError::new);

	assertThat(lookupResult.isTemporary(), equalTo(true));

	CatalogBaseTable catalogBaseTable = lookupResult.getTable();
	assertTrue(catalogBaseTable instanceof CatalogTable);
	CatalogTable table = (CatalogTable) catalogBaseTable;
	assertCatalogTable(table);
	assertCatalogTable(CatalogTableImpl.fromProperties(table.toProperties()));
}

Source File: TableSourceFactoryMock.java From flink with Apache License 2.0

5 votes

@Override
public TableSource<Row> createTableSource(Map<String, String> properties) {
	final DescriptorProperties descriptorProperties = new DescriptorProperties();
	descriptorProperties.putProperties(properties);
	final TableSchema schema = TableSchemaUtils.getPhysicalSchema(
		descriptorProperties.getTableSchema(Schema.SCHEMA));
	return new TableSourceMock(schema);
}

Source File: TableExampleWordCount.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();
    blinkStreamTableEnv
            .connect(new FileSystem().path(path))
            .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n"))
            .withSchema(new Schema().field("word", Types.STRING))
            .inAppendMode()
            .registerTableSource("FlieSourceTable");

    Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable")
            .groupBy("word")
            .select("word,count(word) as _count");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    //打印结果中的 true 和 false，可能会有点疑问，为啥会多出一个字段。
    //Sink 做的事情是先删除再插入，false 表示删除上一条数据，true 表示插入该条数据

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}

Source File: JsonRowFormatFactoryTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json()
			.deriveSchema());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}

Source File: JsonRowFormatFactoryTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSchemaDerivation() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json()
			.deriveSchema());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}

Source File: JsonRowFormatFactoryTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSchemaDerivationByDefault() {
	final Map<String, String> properties = toMap(
		new Schema()
			.field("field1", Types.BOOLEAN())
			.field("field2", Types.INT())
			.field("proctime", Types.SQL_TIMESTAMP()).proctime(),
		new Json());

	testSchemaSerializationSchema(properties);

	testSchemaDeserializationSchema(properties);
}

Source File: FlinkTableITCase.java From flink-connectors with Apache License 2.0

4 votes

/**
 * Validates the use of Pravega Table Descriptor to generate the source/sink Table factory to
 * write and read from Pravega stream using {@link BatchTableEnvironment}
 * @throws Exception
 */
@Test
public void testBatchTableUsingDescriptor() throws Exception {

    final String scope = setupUtils.getScope();
    final String streamName = "stream";
    Stream stream = Stream.of(scope, streamName);
    this.setupUtils.createTestStream(stream.getStreamName(), 1);

    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(1);
    BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);

    PravegaConfig pravegaConfig = setupUtils.getPravegaConfig();

    Pravega pravega = new Pravega();
    pravega.tableSinkWriterBuilder()
            .withRoutingKeyField("category")
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);
    pravega.tableSourceReaderBuilder()
            .withReaderGroupScope(stream.getScope())
            .forStream(stream)
            .withPravegaConfig(pravegaConfig);

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(false))
            .withSchema(new Schema().
                    field("category", DataTypes.STRING()).
                    field("value", DataTypes.INT()));
    desc.createTemporaryTable("test");

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap)
            .createBatchTableSink(propertiesMap);
    final TableSource<?> source = TableFactoryService.find(BatchTableSourceFactory.class, propertiesMap)
            .createBatchTableSource(propertiesMap);

    Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES));

    String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink";

    ConnectorCatalogTable<?, ?> connectorCatalogTableSink = ConnectorCatalogTable.sink(sink, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSinkPath),
            connectorCatalogTableSink, false);

    table.insertInto("PravegaSink");
    env.execute();

    String tableSourcePath = tableEnv.getCurrentDatabase() + "." + "samples";

    ConnectorCatalogTable<?, ?> connectorCatalogTableSource = ConnectorCatalogTable.source(source, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSourcePath),
            connectorCatalogTableSource, false);

    // select some sample data from the Pravega-backed table, as a view
    Table view = tableEnv.sqlQuery("SELECT * FROM samples WHERE category IN ('A','B')");

    // convert the view to a dataset and collect the results for comparison purposes
    List<SampleRecord> results = tableEnv.toDataSet(view, SampleRecord.class).collect();
    Assert.assertEquals(new HashSet<>(SAMPLES), new HashSet<>(results));
}

Source File: HiveTableUtil.java From flink with Apache License 2.0

4 votes

public static Table instantiateHiveTable(ObjectPath tablePath, CatalogBaseTable table, HiveConf hiveConf) {
	if (!(table instanceof CatalogTableImpl) && !(table instanceof CatalogViewImpl)) {
		throw new CatalogException(
				"HiveCatalog only supports CatalogTableImpl and CatalogViewImpl");
	}
	// let Hive set default parameters for us, e.g. serialization.format
	Table hiveTable = org.apache.hadoop.hive.ql.metadata.Table.getEmptyTable(tablePath.getDatabaseName(),
			tablePath.getObjectName());
	hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000));

	Map<String, String> properties = new HashMap<>(table.getProperties());
	// Table comment
	if (table.getComment() != null) {
		properties.put(HiveCatalogConfig.COMMENT, table.getComment());
	}

	boolean isGeneric = HiveCatalog.isGenericForCreate(properties);

	// Hive table's StorageDescriptor
	StorageDescriptor sd = hiveTable.getSd();
	HiveTableUtil.setDefaultStorageFormat(sd, hiveConf);

	if (isGeneric) {
		DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
		tableSchemaProps.putTableSchema(Schema.SCHEMA, table.getSchema());

		if (table instanceof CatalogTable) {
			tableSchemaProps.putPartitionKeys(((CatalogTable) table).getPartitionKeys());
		}

		properties.putAll(tableSchemaProps.asMap());
		properties = maskFlinkProperties(properties);
		hiveTable.setParameters(properties);
	} else {
		HiveTableUtil.initiateTableFromProperties(hiveTable, properties, hiveConf);
		List<FieldSchema> allColumns = HiveTableUtil.createHiveColumns(table.getSchema());
		// Table columns and partition keys
		if (table instanceof CatalogTableImpl) {
			CatalogTable catalogTable = (CatalogTableImpl) table;

			if (catalogTable.isPartitioned()) {
				int partitionKeySize = catalogTable.getPartitionKeys().size();
				List<FieldSchema> regularColumns = allColumns.subList(0, allColumns.size() - partitionKeySize);
				List<FieldSchema> partitionColumns = allColumns.subList(allColumns.size() - partitionKeySize, allColumns.size());

				sd.setCols(regularColumns);
				hiveTable.setPartitionKeys(partitionColumns);
			} else {
				sd.setCols(allColumns);
				hiveTable.setPartitionKeys(new ArrayList<>());
			}
		} else {
			sd.setCols(allColumns);
		}
		// Table properties
		hiveTable.getParameters().putAll(properties);
	}

	if (table instanceof CatalogViewImpl) {
		// TODO: [FLINK-12398] Support partitioned view in catalog API
		hiveTable.setPartitionKeys(new ArrayList<>());

		CatalogView view = (CatalogView) table;
		hiveTable.setViewOriginalText(view.getOriginalQuery());
		hiveTable.setViewExpandedText(view.getExpandedQuery());
		hiveTable.setTableType(TableType.VIRTUAL_VIEW.name());
	}

	return hiveTable;
}

Source File: HiveCatalog.java From flink with Apache License 2.0

4 votes

private CatalogBaseTable instantiateCatalogTable(Table hiveTable, HiveConf hiveConf) {
	boolean isView = TableType.valueOf(hiveTable.getTableType()) == TableType.VIRTUAL_VIEW;

	// Table properties
	Map<String, String> properties = hiveTable.getParameters();

	boolean isGeneric = isGenericForGet(hiveTable.getParameters());

	TableSchema tableSchema;
	// Partition keys
	List<String> partitionKeys = new ArrayList<>();

	if (isGeneric) {
		properties = retrieveFlinkProperties(properties);
		DescriptorProperties tableSchemaProps = new DescriptorProperties(true);
		tableSchemaProps.putProperties(properties);
		ObjectPath tablePath = new ObjectPath(hiveTable.getDbName(), hiveTable.getTableName());
		tableSchema = tableSchemaProps.getOptionalTableSchema(Schema.SCHEMA)
				.orElseThrow(() -> new CatalogException("Failed to get table schema from properties for generic table " + tablePath));
		partitionKeys = tableSchemaProps.getPartitionKeys();
		// remove the schema from properties
		properties = CatalogTableImpl.removeRedundant(properties, tableSchema, partitionKeys);
	} else {
		properties.put(CatalogConfig.IS_GENERIC, String.valueOf(false));
		// Table schema
		List<FieldSchema> fields = getNonPartitionFields(hiveConf, hiveTable);
		Set<String> notNullColumns = client.getNotNullColumns(hiveConf, hiveTable.getDbName(), hiveTable.getTableName());
		Optional<UniqueConstraint> primaryKey = isView ? Optional.empty() :
				client.getPrimaryKey(hiveTable.getDbName(), hiveTable.getTableName(), HiveTableUtil.relyConstraint((byte) 0));
		// PK columns cannot be null
		primaryKey.ifPresent(pk -> notNullColumns.addAll(pk.getColumns()));
		tableSchema = HiveTableUtil.createTableSchema(fields, hiveTable.getPartitionKeys(), notNullColumns, primaryKey.orElse(null));

		if (!hiveTable.getPartitionKeys().isEmpty()) {
			partitionKeys = getFieldNames(hiveTable.getPartitionKeys());
		}
	}

	String comment = properties.remove(HiveCatalogConfig.COMMENT);

	if (isView) {
		return new CatalogViewImpl(
				hiveTable.getViewOriginalText(),
				hiveTable.getViewExpandedText(),
				tableSchema,
				properties,
				comment);
	} else {
		return new CatalogTableImpl(tableSchema, partitionKeys, properties, comment);
	}
}

org.apache.flink.table.descriptors.Schema Java Examples