Java Code Examples for org.apache.spark.sql.streaming.DataStreamWriter#option()
The following examples show how to use
org.apache.spark.sql.streaming.DataStreamWriter#option() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StructuredStreamingSqlAnalyse.java From sylph with Apache License 2.0 | 6 votes |
public void createSinkTable(SinkContext sinkContext, StructType tableSparkType) { final String driverClass = (String) sinkContext.withConfig().get("type"); IocFactory iocFactory = IocFactory.create(sparkBean, binder -> binder.bind(SinkContext.class, sinkContext)); StructuredNodeLoader loader = new StructuredNodeLoader(connectorStore, iocFactory); UnaryOperator<Dataset<Row>> outputStream = dataSet -> { checkQueryAndTableSinkSchema(dataSet.schema(), tableSparkType, sinkContext.getSinkTable()); DataStreamWriter<Row> writer = loader.loadSinkWithComplic(driverClass, sinkContext.withConfig()).apply(dataSet); if (!isCompile) { //UnsupportedOperationChecker.checkForContinuous(); writer = writer.option("checkpointLocation", checkpointLocation); writer.start(); } return null; }; sinks.put(sinkContext.getSinkTable(), outputStream); }
Example 2
Source File: HoodieJavaStreamingApp.java From hudi with Apache License 2.0 | 6 votes |
/** * Setup configs for syncing to hive. * * @param writer * @return */ private DataStreamWriter<Row> updateHiveSyncConfig(DataStreamWriter<Row> writer) { if (enableHiveSync) { LOG.info("Enabling Hive sync to " + hiveJdbcUrl); writer = writer.option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY(), hiveTable) .option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(), hiveDB) .option(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), hiveJdbcUrl) .option(DataSourceWriteOptions.HIVE_USER_OPT_KEY(), hiveUser) .option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY(), hivePass) .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY(), "true"); if (useMultiPartitionKeys) { writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), "year,month,day").option( DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(), MultiPartKeysValueExtractor.class.getCanonicalName()); } else { writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), "dateStr"); } } return writer; }
Example 3
Source File: TranslationContext.java From beam with Apache License 2.0 | 5 votes |
/** Starts the pipeline. */ public void startPipeline() { try { SparkStructuredStreamingPipelineOptions options = serializablePipelineOptions.get().as(SparkStructuredStreamingPipelineOptions.class); int datasetIndex = 0; for (Dataset<?> dataset : leaves) { if (options.isStreaming()) { // TODO: deal with Beam Discarding, Accumulating and Accumulating & Retracting outputmodes // with DatastreamWriter.outputMode DataStreamWriter<?> dataStreamWriter = dataset.writeStream(); // spark sets a default checkpoint dir if not set. if (options.getCheckpointDir() != null) { dataStreamWriter = dataStreamWriter.option("checkpointLocation", options.getCheckpointDir()); } // TODO: Do not await termination here. dataStreamWriter.foreach(new NoOpForeachWriter<>()).start().awaitTermination(); } else { if (options.getTestMode()) { LOG.debug("**** dataset {} catalyst execution plans ****", ++datasetIndex); dataset.explain(true); } // apply a dummy fn just to apply foreach action that will trigger the pipeline run in // spark dataset.foreach((ForeachFunction) t -> {}); } } } catch (StreamingQueryException e) { throw new RuntimeException("Pipeline execution failed: " + e); } }