org.apache.flink.table.descriptors.OldCsv Java Exaples

Source File: TableExampleWordCount.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();
    blinkStreamTableEnv
            .connect(new FileSystem().path(path))
            .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n"))
            .withSchema(new Schema().field("word", Types.STRING))
            .inAppendMode()
            .registerTableSource("FlieSourceTable");

    Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable")
            .groupBy("word")
            .select("word,count(word) as _count");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    //打印结果中的 true 和 false，可能会有点疑问，为啥会多出一个字段。
    //Sink 做的事情是先删除再插入，false 表示删除上一条数据，true 表示插入该条数据

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}

Source File: TableExampleWordCount.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    String path = TableExampleWordCount.class.getClassLoader().getResource("words.txt").getPath();
    blinkStreamTableEnv
            .connect(new FileSystem().path(path))
            .withFormat(new OldCsv().field("word", Types.STRING).lineDelimiter("\n"))
            .withSchema(new Schema().field("word", Types.STRING))
            .inAppendMode()
            .registerTableSource("FlieSourceTable");

    Table wordWithCount = blinkStreamTableEnv.scan("FlieSourceTable")
            .groupBy("word")
            .select("word,count(word) as _count");
    blinkStreamTableEnv.toRetractStream(wordWithCount, Row.class).print();

    //打印结果中的 true 和 false，可能会有点疑问，为啥会多出一个字段。
    //Sink 做的事情是先删除再插入，false 表示删除上一条数据，true 表示插入该条数据

    blinkStreamTableEnv.execute("Blink Stream SQL Job");
}

Source File: HiveCatalogITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testGenericTable() throws Exception {
	ExecutionEnvironment execEnv = ExecutionEnvironment.createLocalEnvironment(1);
	BatchTableEnvironment tableEnv = BatchTableEnvironment.create(execEnv);

	tableEnv.registerCatalog("myhive", hiveCatalog);

	TableSchema schema = TableSchema.builder()
		.field("name", DataTypes.STRING())
		.field("age", DataTypes.INT())
		.build();

	FormatDescriptor format = new OldCsv()
		.field("name", Types.STRING())
		.field("age", Types.INT());

	CatalogTable source =
		new CatalogTableBuilder(
			new FileSystem().path(this.getClass().getResource("/csv/test.csv").getPath()),
			schema)
		.withFormat(format)
		.inAppendMode()
		.withComment("Comment.")
		.build();

	Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");

	CatalogTable sink =
		new CatalogTableBuilder(
			new FileSystem().path(p.toAbsolutePath().toString()),
			schema)
			.withFormat(format)
			.inAppendMode()
			.withComment("Comment.")
			.build();

	hiveCatalog.createTable(
		new ObjectPath(HiveCatalog.DEFAULT_DB, sourceTableName),
		source,
		false
	);

	hiveCatalog.createTable(
		new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName),
		sink,
		false
	);

	Table t = tableEnv.sqlQuery(
		String.format("select * from myhive.`default`.%s", sourceTableName));

	List<Row> result = tableEnv.toDataSet(t, Row.class).collect();

	// assert query result
	assertEquals(
		Arrays.asList(
			Row.of("1", 1),
			Row.of("2", 2),
			Row.of("3", 3)),
		result
	);

	tableEnv.sqlUpdate(
		String.format("insert into myhive.`default`.%s select * from myhive.`default`.%s",
			sinkTableName,
			sourceTableName));
	tableEnv.execute("myjob");

	// assert written result
	File resultFile = new File(p.toAbsolutePath().toString());
	BufferedReader reader = new BufferedReader(new FileReader(resultFile));
	String readLine;
	for (int i = 0; i < 3; i++) {
		readLine = reader.readLine();
		assertEquals(String.format("%d,%d", i + 1, i + 1), readLine);
	}

	// No more line
	assertNull(reader.readLine());
}

Source File: HiveCatalogITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testCsvTableViaAPI() throws Exception {
	EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build();
	TableEnvironment tableEnv = TableEnvironment.create(settings);
	tableEnv.getConfig().addConfiguration(new Configuration().set(CoreOptions.DEFAULT_PARALLELISM, 1));

	tableEnv.registerCatalog("myhive", hiveCatalog);
	tableEnv.useCatalog("myhive");

	TableSchema schema = TableSchema.builder()
		.field("name", DataTypes.STRING())
		.field("age", DataTypes.INT())
		.build();

	FormatDescriptor format = new OldCsv()
		.field("name", Types.STRING())
		.field("age", Types.INT());

	CatalogTable source =
		new CatalogTableBuilder(
			new FileSystem().path(this.getClass().getResource("/csv/test.csv").getPath()),
			schema)
		.withFormat(format)
		.inAppendMode()
		.withComment("Comment.")
		.build();

	Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");

	CatalogTable sink =
		new CatalogTableBuilder(
			new FileSystem().path(p.toAbsolutePath().toString()),
			schema)
			.withFormat(format)
			.inAppendMode()
			.withComment("Comment.")
			.build();

	hiveCatalog.createTable(
		new ObjectPath(HiveCatalog.DEFAULT_DB, sourceTableName),
		source,
		false
	);

	hiveCatalog.createTable(
		new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName),
		sink,
		false
	);

	Table t = tableEnv.sqlQuery(
		String.format("select * from myhive.`default`.%s", sourceTableName));

	List<Row> result = Lists.newArrayList(t.execute().collect());
	result.sort(Comparator.comparing(String::valueOf));

	// assert query result
	assertEquals(
		Arrays.asList(
			Row.of("1", 1),
			Row.of("2", 2),
			Row.of("3", 3)),
		result
	);

	TableEnvUtil.execInsertSqlAndWaitResult(tableEnv,
		String.format("insert into myhive.`default`.%s select * from myhive.`default`.%s",
			sinkTableName,
			sourceTableName));

	// assert written result
	File resultFile = new File(p.toAbsolutePath().toString());
	BufferedReader reader = new BufferedReader(new FileReader(resultFile));
	String readLine;
	for (int i = 0; i < 3; i++) {
		readLine = reader.readLine();
		assertEquals(String.format("%d,%d", i + 1, i + 1), readLine);
	}

	// No more line
	assertNull(reader.readLine());

	tableEnv.executeSql(String.format("DROP TABLE %s", sourceTableName));
	tableEnv.executeSql(String.format("DROP TABLE %s", sinkTableName));
}

Source File: HiveCatalogUseBlinkITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testBlinkUdf() throws Exception {
	TableSchema schema = TableSchema.builder()
			.field("name", DataTypes.STRING())
			.field("age", DataTypes.INT())
			.build();

	FormatDescriptor format = new OldCsv()
			.field("name", Types.STRING())
			.field("age", Types.INT());

	CatalogTable source =
			new CatalogTableBuilder(
					new FileSystem().path(this.getClass().getResource("/csv/test.csv").getPath()),
					schema)
					.withFormat(format)
					.inAppendMode()
					.withComment("Comment.")
					.build();

	hiveCatalog.createTable(
			new ObjectPath(HiveCatalog.DEFAULT_DB, sourceTableName),
			source,
			false
	);

	hiveCatalog.createFunction(
			new ObjectPath(HiveCatalog.DEFAULT_DB, "myudf"),
			new CatalogFunctionImpl(TestHiveSimpleUDF.class.getCanonicalName()),
			false);
	hiveCatalog.createFunction(
			new ObjectPath(HiveCatalog.DEFAULT_DB, "mygenericudf"),
			new CatalogFunctionImpl(TestHiveGenericUDF.class.getCanonicalName()),
			false);
	hiveCatalog.createFunction(
			new ObjectPath(HiveCatalog.DEFAULT_DB, "myudtf"),
			new CatalogFunctionImpl(TestHiveUDTF.class.getCanonicalName()),
			false);
	hiveCatalog.createFunction(
			new ObjectPath(HiveCatalog.DEFAULT_DB, "myudaf"),
			new CatalogFunctionImpl(GenericUDAFSum.class.getCanonicalName()),
			false);

	testUdf(true);
	testUdf(false);
}

org.apache.flink.table.descriptors.OldCsv Java Examples