org.apache.flink.table.sinks.CsvTableSink Java Exaples

Source File: WordCountTable.java From flink-simple-tutorial with Apache License 2.0

8 votes

public static void main(String[] args) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);

    DataSet<WC> input = env.fromElements(
            new WC("Hello", 1),
            new WC("flink", 1),
            new WC("Hello", 1));

    Table table = tEnv.fromDataSet(input);

    Table filtered = table
            .groupBy("word")
            .select("word, frequency.sum as frequency")
            .filter("frequency = 2");

    DataSet<WC> result = tEnv.toDataSet(filtered, WC.class);
    String path = "";
    CsvTableSink tableSink = new CsvTableSink(path, ",");
    tEnv.registerTableSink("csvSink", tableSink);
    result.print();
}

Source File: BatchSQLTestProgram.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	String sqlStatement = params.getRequired("sqlStatement");

	TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.newInstance()
		.useBlinkPlanner()
		.inBatchMode()
		.build());

	tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0));
	tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
	tEnv.registerTableSink("sinkTable",
		new CsvTableSink(outputPath)
			.configure(new String[]{"f0", "f1"}, new TypeInformation[]{Types.INT, Types.SQL_TIMESTAMP}));

	tEnv.sqlUpdate(sqlStatement);
	tEnv.execute("TestSqlJob");
}

Source File: CsvSinkStreamOp.java From Alink with Apache License 2.0

6 votes

@Override
public CsvSinkStreamOp sinkFrom(StreamOperator in) {
    this.schema = in.getSchema();

    final String filePath = getFilePath();
    final String fieldDelim = getFieldDelimiter();
    final String rowDelimiter = getRowDelimiter();
    final int numFiles = getNumFiles();
    final TypeInformation[] types = in.getColTypes();
    final Character quoteChar = getQuoteChar();

    FileSystem.WriteMode writeMode;
    if (getOverwriteSink()) {
        writeMode = FileSystem.WriteMode.OVERWRITE;
    } else {
        writeMode = FileSystem.WriteMode.NO_OVERWRITE;
    }

    DataStream<Row> output = ((DataStream<Row>) in.getDataStream())
        .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar))
        .setParallelism(numFiles);

    CsvTableSink cts = new CsvTableSink(filePath, rowDelimiter, numFiles, writeMode);
    cts.emitDataStream(output);
    return this;
}

Source File: BatchSQLTestProgram.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String outputPath = params.getRequired("outputPath");
	String sqlStatement = params.getRequired("sqlStatement");

	TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.newInstance()
		.useBlinkPlanner()
		.inBatchMode()
		.build());

	((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0));
	((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
	((TableEnvironmentInternal) tEnv).registerTableSinkInternal("sinkTable",
		new CsvTableSink(outputPath)
			.configure(new String[]{"f0", "f1"}, new TypeInformation[]{Types.INT, Types.SQL_TIMESTAMP}));

	TableResult result = tEnv.executeSql(sqlStatement);
	// wait job finish
	result.getJobClient().get().getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get();
}

Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0

5 votes

public static void testFlinkAvroSQL() {
    System.out.println("TestCase_Test Avro SQL");
    String resultFile = "/home/vagrant/test.txt";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("static.avro.schema", "empty_schema");

    try {
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders");
        Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders");

        Files.deleteIfExists(Paths.get(resultFile));

        // create a TableSink
        TableSink sink = new CsvTableSink(resultFile, "|");
        // write the result Table to the TableSink
        result.writeToSink(sink);
        env.execute("Flink AVRO SQL KAFKA Test");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

Source File: CsvTableSinkFactoryTest.java From flink with Apache License 2.0

5 votes

@Test
public void testAppendTableSinkFactory() {
	DescriptorProperties descriptor = createDescriptor(testingSchema);
	descriptor.putString("update-mode", "append");
	TableSink sink = createTableSink(descriptor);

	assertTrue(sink instanceof CsvTableSink);
	assertEquals(testingSchema.toRowDataType(), sink.getConsumedDataType());
}

Source File: CsvTableSinkFactoryTest.java From flink with Apache License 2.0

5 votes

@Test
public void testBatchTableSinkFactory() {
	DescriptorProperties descriptor = createDescriptor(testingSchema);
	TableSink sink = createTableSink(descriptor);

	assertTrue(sink instanceof CsvTableSink);
	assertEquals(testingSchema.toRowDataType(), sink.getConsumedDataType());
}

Source File: TpcdsTestProgram.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	ParameterTool params = ParameterTool.fromArgs(args);
	String sourceTablePath = params.getRequired("sourceTablePath");
	String queryPath = params.getRequired("queryPath");
	String sinkTablePath = params.getRequired("sinkTablePath");
	Boolean useTableStats = params.getBoolean("useTableStats");
	TableEnvironment tableEnvironment = prepareTableEnv(sourceTablePath, useTableStats);

	//execute TPC-DS queries
	for (String queryId : TPCDS_QUERIES) {
		System.out.println("[INFO]Run TPC-DS query " + queryId + " ...");
		String queryName = QUERY_PREFIX + queryId + QUERY_SUFFIX;
		String queryFilePath = queryPath + FILE_SEPARATOR + queryName;
		String queryString = loadFile2String(queryFilePath);
		Table resultTable = tableEnvironment.sqlQuery(queryString);

		//register sink table
		String sinkTableName = QUERY_PREFIX + queryId + "_sinkTable";
		((TableEnvironmentInternal) tableEnvironment).registerTableSinkInternal(sinkTableName,
				new CsvTableSink(
					sinkTablePath + FILE_SEPARATOR + queryId + RESULT_SUFFIX,
					COL_DELIMITER,
					1,
					FileSystem.WriteMode.OVERWRITE,
					resultTable.getSchema().getFieldNames(),
					resultTable.getSchema().getFieldDataTypes()
				));
		TableResult tableResult = resultTable.executeInsert(sinkTableName);
		// wait job finish
		tableResult.getJobClient().get()
				.getJobExecutionResult(Thread.currentThread().getContextClassLoader())
				.get();
		System.out.println("[INFO]Run TPC-DS query " + queryId + " success.");
	}
}

Source File: CodeGenFlinkTable.java From df_data_service with Apache License 2.0

4 votes

public static void main(String args[]) {

		String transform = "flatMap(new FlinkUDF.LineSplitter()).groupBy(0).sum(1).print();\n";

		String transform2 = "select(\"name\");\n";

		String header = "package dynamic;\n" +
				"import org.apache.flink.api.table.Table;\n" +
				"import com.datafibers.util.*;\n";

		String javaCode = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public void runTransform(DataSet<String> ds) {\n" +
						"try {" +
						"ds."+ transform +
						"} catch (Exception e) {" +
						"};" +
				"}}";

		String javaCode2 = header +
				"public class FlinkScript implements DynamicRunner {\n" +
				"@Override \n" +
				"    public Table transTableObj(Table tbl) {\n" +
					"try {" +
					"return tbl."+ transform2 +
					"} catch (Exception e) {" +
					"};" +
					"return null;}}";

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);
		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");
		Table ingest = tableEnv.scan("mycsv");

		try {
			String className = "dynamic.FlinkScript";
			Class aClass = CompilerUtils.CACHED_COMPILER.loadFromJava(className, javaCode2);
			DynamicRunner runner = (DynamicRunner) aClass.newInstance();
			//runner.runTransform(ds);
			Table result = runner.transTableObj(ingest);
			// write the result Table to the TableSink
			result.writeToSink(sink);
			env.execute();

		} catch (Exception e) {
			e.printStackTrace();
		}
	}

Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0

4 votes

public static void testFlinkSQL() {

        LOG.info("Only Unit Testing Function is enabled");
        String resultFile = "/home/vagrant/test.txt";

        try {

            String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
            StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
                    .setParallelism(1);
            String kafkaTopic = "finance";
            String kafkaTopic_stage = "df_trans_stage_finance";
            String kafkaTopic_out = "df_trans_out_finance";



            StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
            Properties properties = new Properties();
            properties.setProperty("bootstrap.servers", "localhost:9092");
            properties.setProperty("group.id", "consumer3");

            // Internal covert Json String to Json - Begin
            DataStream<String> stream = env
                    .addSource(new FlinkKafkaConsumer09<>(kafkaTopic, new SimpleStringSchema(), properties));

            stream.map(new MapFunction<String, String>() {
                @Override
                public String map(String jsonString) throws Exception {
                    return jsonString.replaceAll("\\\\", "").replace("\"{", "{").replace("}\"","}");
                }
            }).addSink(new FlinkKafkaProducer09<String>("localhost:9092", kafkaTopic_stage, new SimpleStringSchema()));
            // Internal covert Json String to Json - End

            String[] fieldNames =  new String[] {"name"};
            Class<?>[] fieldTypes = new Class<?>[] {String.class};

            Kafka09AvroTableSource kafkaTableSource = new Kafka09AvroTableSource(
                    kafkaTopic_stage,
                    properties,
                    fieldNames,
                    fieldTypes);

            //kafkaTableSource.setFailOnMissingField(true);

            tableEnv.registerTableSource("Orders", kafkaTableSource);

            //Table result = tableEnv.sql("SELECT STREAM name FROM Orders");
            Table result = tableEnv.sql("SELECT name FROM Orders");

            Files.deleteIfExists(Paths.get(resultFile));

            // create a TableSink
            TableSink sink = new CsvTableSink(resultFile, "|");
            // write the result Table to the TableSink
            result.writeToSink(sink);

            env.execute("FlinkConsumer");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0

4 votes

public static void testFlinkAvroSQLWithStaticSchema() {
    System.out.println("TestCase_Test Avro SQL with static Schema");

    final String STATIC_USER_SCHEMA = "{"
            + "\"type\":\"record\","
            + "\"name\":\"myrecord\","
            + "\"fields\":["
            + "  { \"name\":\"symbol\", \"type\":\"string\" },"
            + "  { \"name\":\"name\", \"type\":\"string\" },"
            + "  { \"name\":\"exchangecode\", \"type\":\"string\" }"
            + "]}";
    String resultFile = "/home/vagrant/test.txt";

    String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
            .setParallelism(1);
    StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "consumer_test");
    properties.setProperty("schema.subject", "test-value");
    properties.setProperty("schema.registry", "localhost:8081");
    properties.setProperty("static.avro.schema", STATIC_USER_SCHEMA);

    try {
        Kafka09AvroTableSource kafkaAvroTableSource =  new Kafka09AvroTableSource("test", properties);
        tableEnv.registerTableSource("Orders", kafkaAvroTableSource);

        //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders");
        Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders");

        Files.deleteIfExists(Paths.get(resultFile));

        // create a TableSink
        TableSink sink = new CsvTableSink(resultFile, "|");
        // write the result Table to the TableSink
        result.writeToSink(sink);
        env.execute("Flink AVRO SQL KAFKA Test");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

Source File: WordCountStream.java From df_data_service with Apache License 2.0

4 votes

public static void main(String args[]) {

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

		// Create a DataStream from a list of elements
		//DataStream<Integer> ds = env.fromElements(1, 2, 3, 4, 5);

		CsvTableSource csvTableSource = new CsvTableSource(
				"/Users/will/Downloads/file.csv",
				new String[] { "name", "id", "score", "comments" },
				new TypeInformation<?>[] {
						Types.STRING(),
						Types.STRING(),
						Types.STRING(),
						Types.STRING()
				}); // lenient

		tableEnv.registerTableSource("mycsv", csvTableSource);



		TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|");


		//tableEnv.registerDataStream("tbl", ds, "a");
		//Table ingest = tableEnv.fromDataStream(ds, "name");
		Table in = tableEnv.scan("mycsv");
		//Table in = tableEnv.ingest("tbl");
		//Table in = tableEnv.fromDataStream(ds, "a");

		Table result = in.select("name");
		result.writeToSink(sink);
		try {
			env.execute();
		} catch (Exception e) {

		}

		System.out.print("DONE");
	}

org.apache.flink.table.sinks.CsvTableSink Java Examples