org.apache.beam.sdk.io.jdbc.JdbcIO Java Examples
The following examples show how to use
org.apache.beam.sdk.io.jdbc.JdbcIO.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JDBCOutputPTransformRuntime.java From components with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<IndexedRecord> input) { return input.apply(JdbcIO.<IndexedRecord> write().withDataSourceConfiguration(JdbcIO.DataSourceConfiguration .create(properties.getDatasetProperties().getDatastoreProperties().getCurrentDriverClass(), properties.getDatasetProperties().getDatastoreProperties().jdbcUrl.getValue()) .withUsername(properties.getDatasetProperties().getDatastoreProperties().userId.getValue()) .withPassword(properties.getDatasetProperties().getDatastoreProperties().password.getValue())) .withStatement( JDBCSQLBuilder.getInstance().generateSQL4Insert(properties.getDatasetProperties().tableName.getValue(), properties.getDatasetProperties().main.schema.getValue())) .withPreparedStatementSetter(new PreparedStatementSetter<IndexedRecord>() { @Override public void setParameters(IndexedRecord indexedRecord, PreparedStatement preparedStatement) throws Exception { List<Schema.Field> fields = indexedRecord.getSchema().getFields(); int index = 0; for (Schema.Field f : fields) { JDBCMapping.setValue(++index, preparedStatement, f, indexedRecord.get(f.pos())); } } })); }
Example #2
Source File: JDBCInputPTransformRuntime.java From components with Apache License 2.0 | 6 votes |
@Override public PCollection<IndexedRecord> expand(PBegin pBegin) { return (PCollection<IndexedRecord>) pBegin.apply(JdbcIO .<IndexedRecord> read() .withDataSourceConfiguration( JdbcIO.DataSourceConfiguration .create(properties.getDatasetProperties().getDatastoreProperties().getCurrentDriverClass(), properties.getDatasetProperties().getDatastoreProperties().jdbcUrl.getValue()) .withUsername(properties.getDatasetProperties().getDatastoreProperties().userId.getValue()) .withPassword(properties.getDatasetProperties().getDatastoreProperties().password.getValue())) .withFetchSize(FETCH_SIZE) .withQuery(properties.getDatasetProperties().getSql()).withRowMapper(new JdbcIO.RowMapper<IndexedRecord>() { @Override public IndexedRecord mapRow(ResultSet resultSet) throws Exception { if (factory == null) { factory = new ResultSetStringRecordConverter(); factory.setSchema(defaultOutputCoder.getSchema()); } return factory.convertToAvro(resultSet); } }).withCoder(getDefaultOutputCoder())); }
Example #3
Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0 | 5 votes |
private DynamicReadFn( DynamicDataSourceConfiguration dataSourceConfiguration, ValueProvider<String> query, JdbcIO.RowMapper<T> rowMapper) { this.dataSourceConfiguration = dataSourceConfiguration; this.query = query; this.rowMapper = rowMapper; }
Example #4
Source File: JdbcConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void testRowMapper() throws Exception { JdbcIO.RowMapper<TableRow> resultSetConverters = JdbcConverters.getResultSetToTableRow(); TableRow actualTableRow = resultSetConverters.mapRow(resultSet); assertThat(expectedTableRow.size(), equalTo(actualTableRow.size())); assertThat(actualTableRow, equalTo(expectedTableRow)); }
Example #5
Source File: IndexerPipeline.java From dataflow-opinion-analysis with Apache License 2.0 | 5 votes |
/** * @param options * @return PTransform that reads from a JDBC source */ private static org.apache.beam.sdk.io.jdbc.JdbcIO.Read<InputContent> readDBRows(IndexerPipelineOptions options) { String query = IndexerPipelineUtils.buildJdbcSourceImportQuery(options); return JdbcIO.<InputContent>read() .withDataSourceConfiguration( JdbcIO.DataSourceConfiguration.create(options.getJdbcDriverClassName(), options.getJdbcSourceUrl()) .withUsername(options.getJdbcSourceUsername()) .withPassword(options.getJdbcSourcePassword()) ) .withQuery(query) .withRowMapper(new RowMapper<InputContent>() { @Override public InputContent mapRow(ResultSet resultSet) throws Exception { InputContent result = new InputContent( resultSet.getString("url"), resultSet.getLong("pub_time")*1000L, resultSet.getString("title"), resultSet.getString("author"), resultSet.getString("language"), resultSet.getString("page_text"), resultSet.getString("doc_col_id"), resultSet.getString("col_item_id"), resultSet.getInt("skip_indexing") ); return result; } }) .withCoder(AvroCoder.of(InputContent.class)); }
Example #6
Source File: JdbcSource.java From component-runtime with Apache License 2.0 | 5 votes |
public PCollection<JsonObject> expand(final PBegin input) { final WorkAroundCoder workAroundCoder = new WorkAroundCoder(); final PCollection<JsonObject> apply = input .apply(JdbcIO .<JsonObject> read() .withRowMapper(new RecordMapper(builder)) .withDataSourceConfiguration(config.asBeamConfig()) .withQuery(config.query) .withCoder(workAroundCoder)); workAroundCoder.collection = apply; return apply; }
Example #7
Source File: JdbcSource.java From component-runtime with Apache License 2.0 | 5 votes |
JdbcIO.DataSourceConfiguration asBeamConfig() { final JdbcIO.DataSourceConfiguration configuration = JdbcIO.DataSourceConfiguration.create(driver, url); if (username != null) { return configuration.withUsername(username).withPassword(password); } return configuration; }
Example #8
Source File: JdbcConverters.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** Factory method for {@link ResultSetToTableRow}. */ public static JdbcIO.RowMapper<TableRow> getResultSetToTableRow() { return new ResultSetToTableRow(); }
Example #9
Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Nullable abstract JdbcIO.RowMapper<T> getRowMapper();
Example #10
Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0 | 4 votes |
public DynamicRead<T> withRowMapper(JdbcIO.RowMapper<T> rowMapper) { checkArgument(rowMapper != null, ".withRowMapper(rowMapper) called with null rowMapper"); return toBuilder().setRowMapper(rowMapper).build(); }
Example #11
Source File: SocialStatsPipeline.java From dataflow-opinion-analysis with Apache License 2.0 | 4 votes |
/** * This function creates the DAG graph of transforms. It can be called from main() * as well as from the ControlPipeline. * @param options * @return * @throws Exception */ public static Pipeline createSocialStatsPipeline(IndexerPipelineOptions options) throws Exception { IndexerPipelineUtils.validateSocialStatsPipelineOptions(options); Pipeline pipeline = Pipeline.create(options); PCollection<WebresourceSocialCount> readCounts = null; String query = IndexerPipelineUtils.buildJdbcSourceImportQueryForSocialStats(options); readCounts = pipeline.apply ( JdbcIO.<WebresourceSocialCount>read() .withDataSourceConfiguration( JdbcIO.DataSourceConfiguration.create(options.getJdbcDriverClassName(), options.getJdbcSourceUrl()) .withUsername(options.getJdbcSourceUsername()) .withPassword(options.getJdbcSourcePassword()) ) .withQuery(query) .withRowMapper(new RowMapper<WebresourceSocialCount>() { @Override public WebresourceSocialCount mapRow(ResultSet resultSet) throws Exception { WebresourceSocialCount result = new WebresourceSocialCount( resultSet.getLong("page_pub_time")*1000L, resultSet.getString("url"), resultSet.getString("doc_col_id"), resultSet.getString("col_item_id"), resultSet.getLong("count_time")*1000L, resultSet.getInt("count_tw"), resultSet.getInt("count_fb") ); return result; } }) .withCoder(AvroCoder.of(WebresourceSocialCount.class)) ); // if content is to be added to bigquery, then obtain a list of // latest social stats per page PCollection<WebresourceSocialCount> countsToProcess = null; if (options.getWriteTruncate() != null && !options.getWriteTruncate() && options.getWrSocialCountHistoryWindowSec() != null) { String queryCache = IndexerPipelineUtils.buildBigQueryProcessedSocialCountsQuery(options); PCollection<KV<String,Long>> lastCountTimes = pipeline .apply("Get processed social count times", BigQueryIO.read().fromQuery(queryCache)) .apply(ParDo.of(new GetLastCountTime())); final PCollectionView<Map<String,Long>> lastCountTimesSideInput = lastCountTimes.apply(View.<String,Long>asMap()); countsToProcess = readCounts .apply(ParDo .of(new DoFn<WebresourceSocialCount, WebresourceSocialCount>() { @ProcessElement public void processElement(ProcessContext c) { WebresourceSocialCount i = c.element(); // check in the map if we already processed this Url, and if we haven't, add the input content to // the list that needs to be processed Long lastTime = c.sideInput(lastCountTimesSideInput).get(i.webResourceHash); if (lastTime == null || lastTime < i.countTime) c.output(i); } }) .withSideInputs(lastCountTimesSideInput) ); } else { countsToProcess = readCounts; } PCollection<TableRow> wrSocialCounts = countsToProcess .apply(ParDo.of(new CreateWrSocialCountTableRowFn())); // Now write to BigQuery WriteDisposition dispo = options.getWriteTruncate() ? WriteDisposition.WRITE_TRUNCATE: WriteDisposition.WRITE_APPEND; wrSocialCounts .apply("Write to wrsocialcount", BigQueryIO .writeTableRows() .to(getWRSocialCountTableReference(options)) .withSchema(getWrSocialCountSchema()) .withWriteDisposition(dispo)); return pipeline; }
Example #12
Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0 | votes |
abstract Builder<T> setRowMapper(JdbcIO.RowMapper<T> rowMapper);