org.apache.flink.table.sinks.CsvTableSink Java Examples
The following examples show how to use
org.apache.flink.table.sinks.CsvTableSink.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordCountTable.java From flink-simple-tutorial with Apache License 2.0 | 8 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); BatchTableEnvironment tEnv = BatchTableEnvironment.create(env); DataSet<WC> input = env.fromElements( new WC("Hello", 1), new WC("flink", 1), new WC("Hello", 1)); Table table = tEnv.fromDataSet(input); Table filtered = table .groupBy("word") .select("word, frequency.sum as frequency") .filter("frequency = 2"); DataSet<WC> result = tEnv.toDataSet(filtered, WC.class); String path = ""; CsvTableSink tableSink = new CsvTableSink(path, ","); tEnv.registerTableSink("csvSink", tableSink); result.print(); }
Example #2
Source File: BatchSQLTestProgram.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); String sqlStatement = params.getRequired("sqlStatement"); TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.newInstance() .useBlinkPlanner() .inBatchMode() .build()); tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0)); tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); tEnv.registerTableSink("sinkTable", new CsvTableSink(outputPath) .configure(new String[]{"f0", "f1"}, new TypeInformation[]{Types.INT, Types.SQL_TIMESTAMP})); tEnv.sqlUpdate(sqlStatement); tEnv.execute("TestSqlJob"); }
Example #3
Source File: CsvSinkStreamOp.java From Alink with Apache License 2.0 | 6 votes |
@Override public CsvSinkStreamOp sinkFrom(StreamOperator in) { this.schema = in.getSchema(); final String filePath = getFilePath(); final String fieldDelim = getFieldDelimiter(); final String rowDelimiter = getRowDelimiter(); final int numFiles = getNumFiles(); final TypeInformation[] types = in.getColTypes(); final Character quoteChar = getQuoteChar(); FileSystem.WriteMode writeMode; if (getOverwriteSink()) { writeMode = FileSystem.WriteMode.OVERWRITE; } else { writeMode = FileSystem.WriteMode.NO_OVERWRITE; } DataStream<Row> output = ((DataStream<Row>) in.getDataStream()) .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar)) .setParallelism(numFiles); CsvTableSink cts = new CsvTableSink(filePath, rowDelimiter, numFiles, writeMode); cts.emitDataStream(output); return this; }
Example #4
Source File: BatchSQLTestProgram.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); String sqlStatement = params.getRequired("sqlStatement"); TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.newInstance() .useBlinkPlanner() .inBatchMode() .build()); ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0)); ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); ((TableEnvironmentInternal) tEnv).registerTableSinkInternal("sinkTable", new CsvTableSink(outputPath) .configure(new String[]{"f0", "f1"}, new TypeInformation[]{Types.INT, Types.SQL_TIMESTAMP})); TableResult result = tEnv.executeSql(sqlStatement); // wait job finish result.getJobClient().get().getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get(); }
Example #5
Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0 | 5 votes |
public static void testFlinkAvroSQL() { System.out.println("TestCase_Test Avro SQL"); String resultFile = "/home/vagrant/test.txt"; String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath(); StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath) .setParallelism(1); StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "localhost:9092"); properties.setProperty("group.id", "consumer_test"); properties.setProperty("schema.subject", "test-value"); properties.setProperty("schema.registry", "localhost:8081"); properties.setProperty("static.avro.schema", "empty_schema"); try { Kafka09AvroTableSource kafkaAvroTableSource = new Kafka09AvroTableSource("test", properties); tableEnv.registerTableSource("Orders", kafkaAvroTableSource); //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders"); Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders"); Files.deleteIfExists(Paths.get(resultFile)); // create a TableSink TableSink sink = new CsvTableSink(resultFile, "|"); // write the result Table to the TableSink result.writeToSink(sink); env.execute("Flink AVRO SQL KAFKA Test"); } catch (Exception e) { e.printStackTrace(); } }
Example #6
Source File: CsvTableSinkFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAppendTableSinkFactory() { DescriptorProperties descriptor = createDescriptor(testingSchema); descriptor.putString("update-mode", "append"); TableSink sink = createTableSink(descriptor); assertTrue(sink instanceof CsvTableSink); assertEquals(testingSchema.toRowDataType(), sink.getConsumedDataType()); }
Example #7
Source File: CsvTableSinkFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBatchTableSinkFactory() { DescriptorProperties descriptor = createDescriptor(testingSchema); TableSink sink = createTableSink(descriptor); assertTrue(sink instanceof CsvTableSink); assertEquals(testingSchema.toRowDataType(), sink.getConsumedDataType()); }
Example #8
Source File: TpcdsTestProgram.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String sourceTablePath = params.getRequired("sourceTablePath"); String queryPath = params.getRequired("queryPath"); String sinkTablePath = params.getRequired("sinkTablePath"); Boolean useTableStats = params.getBoolean("useTableStats"); TableEnvironment tableEnvironment = prepareTableEnv(sourceTablePath, useTableStats); //execute TPC-DS queries for (String queryId : TPCDS_QUERIES) { System.out.println("[INFO]Run TPC-DS query " + queryId + " ..."); String queryName = QUERY_PREFIX + queryId + QUERY_SUFFIX; String queryFilePath = queryPath + FILE_SEPARATOR + queryName; String queryString = loadFile2String(queryFilePath); Table resultTable = tableEnvironment.sqlQuery(queryString); //register sink table String sinkTableName = QUERY_PREFIX + queryId + "_sinkTable"; ((TableEnvironmentInternal) tableEnvironment).registerTableSinkInternal(sinkTableName, new CsvTableSink( sinkTablePath + FILE_SEPARATOR + queryId + RESULT_SUFFIX, COL_DELIMITER, 1, FileSystem.WriteMode.OVERWRITE, resultTable.getSchema().getFieldNames(), resultTable.getSchema().getFieldDataTypes() )); TableResult tableResult = resultTable.executeInsert(sinkTableName); // wait job finish tableResult.getJobClient().get() .getJobExecutionResult(Thread.currentThread().getContextClassLoader()) .get(); System.out.println("[INFO]Run TPC-DS query " + queryId + " success."); } }
Example #9
Source File: CodeGenFlinkTable.java From df_data_service with Apache License 2.0 | 4 votes |
public static void main(String args[]) { String transform = "flatMap(new FlinkUDF.LineSplitter()).groupBy(0).sum(1).print();\n"; String transform2 = "select(\"name\");\n"; String header = "package dynamic;\n" + "import org.apache.flink.api.table.Table;\n" + "import com.datafibers.util.*;\n"; String javaCode = header + "public class FlinkScript implements DynamicRunner {\n" + "@Override \n" + " public void runTransform(DataSet<String> ds) {\n" + "try {" + "ds."+ transform + "} catch (Exception e) {" + "};" + "}}"; String javaCode2 = header + "public class FlinkScript implements DynamicRunner {\n" + "@Override \n" + " public Table transTableObj(Table tbl) {\n" + "try {" + "return tbl."+ transform2 + "} catch (Exception e) {" + "};" + "return null;}}"; final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); CsvTableSource csvTableSource = new CsvTableSource( "/Users/will/Downloads/file.csv", new String[] { "name", "id", "score", "comments" }, new TypeInformation<?>[] { Types.STRING(), Types.STRING(), Types.STRING(), Types.STRING() }); // lenient tableEnv.registerTableSource("mycsv", csvTableSource); TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|"); Table ingest = tableEnv.scan("mycsv"); try { String className = "dynamic.FlinkScript"; Class aClass = CompilerUtils.CACHED_COMPILER.loadFromJava(className, javaCode2); DynamicRunner runner = (DynamicRunner) aClass.newInstance(); //runner.runTransform(ds); Table result = runner.transTableObj(ingest); // write the result Table to the TableSink result.writeToSink(sink); env.execute(); } catch (Exception e) { e.printStackTrace(); } }
Example #10
Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0 | 4 votes |
public static void testFlinkSQL() { LOG.info("Only Unit Testing Function is enabled"); String resultFile = "/home/vagrant/test.txt"; try { String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath(); StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath) .setParallelism(1); String kafkaTopic = "finance"; String kafkaTopic_stage = "df_trans_stage_finance"; String kafkaTopic_out = "df_trans_out_finance"; StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "localhost:9092"); properties.setProperty("group.id", "consumer3"); // Internal covert Json String to Json - Begin DataStream<String> stream = env .addSource(new FlinkKafkaConsumer09<>(kafkaTopic, new SimpleStringSchema(), properties)); stream.map(new MapFunction<String, String>() { @Override public String map(String jsonString) throws Exception { return jsonString.replaceAll("\\\\", "").replace("\"{", "{").replace("}\"","}"); } }).addSink(new FlinkKafkaProducer09<String>("localhost:9092", kafkaTopic_stage, new SimpleStringSchema())); // Internal covert Json String to Json - End String[] fieldNames = new String[] {"name"}; Class<?>[] fieldTypes = new Class<?>[] {String.class}; Kafka09AvroTableSource kafkaTableSource = new Kafka09AvroTableSource( kafkaTopic_stage, properties, fieldNames, fieldTypes); //kafkaTableSource.setFailOnMissingField(true); tableEnv.registerTableSource("Orders", kafkaTableSource); //Table result = tableEnv.sql("SELECT STREAM name FROM Orders"); Table result = tableEnv.sql("SELECT name FROM Orders"); Files.deleteIfExists(Paths.get(resultFile)); // create a TableSink TableSink sink = new CsvTableSink(resultFile, "|"); // write the result Table to the TableSink result.writeToSink(sink); env.execute("FlinkConsumer"); } catch (Exception e) { e.printStackTrace(); } }
Example #11
Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0 | 4 votes |
public static void testFlinkAvroSQLWithStaticSchema() { System.out.println("TestCase_Test Avro SQL with static Schema"); final String STATIC_USER_SCHEMA = "{" + "\"type\":\"record\"," + "\"name\":\"myrecord\"," + "\"fields\":[" + " { \"name\":\"symbol\", \"type\":\"string\" }," + " { \"name\":\"name\", \"type\":\"string\" }," + " { \"name\":\"exchangecode\", \"type\":\"string\" }" + "]}"; String resultFile = "/home/vagrant/test.txt"; String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath(); StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath) .setParallelism(1); StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "localhost:9092"); properties.setProperty("group.id", "consumer_test"); properties.setProperty("schema.subject", "test-value"); properties.setProperty("schema.registry", "localhost:8081"); properties.setProperty("static.avro.schema", STATIC_USER_SCHEMA); try { Kafka09AvroTableSource kafkaAvroTableSource = new Kafka09AvroTableSource("test", properties); tableEnv.registerTableSource("Orders", kafkaAvroTableSource); //Table result = tableEnv.sql("SELECT STREAM name, symbol, exchange FROM Orders"); Table result = tableEnv.sql("SELECT name, symbol, exchangecode FROM Orders"); Files.deleteIfExists(Paths.get(resultFile)); // create a TableSink TableSink sink = new CsvTableSink(resultFile, "|"); // write the result Table to the TableSink result.writeToSink(sink); env.execute("Flink AVRO SQL KAFKA Test"); } catch (Exception e) { e.printStackTrace(); } }
Example #12
Source File: WordCountStream.java From df_data_service with Apache License 2.0 | 4 votes |
public static void main(String args[]) { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); // Create a DataStream from a list of elements //DataStream<Integer> ds = env.fromElements(1, 2, 3, 4, 5); CsvTableSource csvTableSource = new CsvTableSource( "/Users/will/Downloads/file.csv", new String[] { "name", "id", "score", "comments" }, new TypeInformation<?>[] { Types.STRING(), Types.STRING(), Types.STRING(), Types.STRING() }); // lenient tableEnv.registerTableSource("mycsv", csvTableSource); TableSink sink = new CsvTableSink("/Users/will/Downloads/out.csv", "|"); //tableEnv.registerDataStream("tbl", ds, "a"); //Table ingest = tableEnv.fromDataStream(ds, "name"); Table in = tableEnv.scan("mycsv"); //Table in = tableEnv.ingest("tbl"); //Table in = tableEnv.fromDataStream(ds, "a"); Table result = in.select("name"); result.writeToSink(sink); try { env.execute(); } catch (Exception e) { } System.out.print("DONE"); }