org.apache.beam.sdk.io.jdbc.JdbcIO Java Exaples

Source File: JDBCOutputPTransformRuntime.java From components with Apache License 2.0

6 votes

@Override
public PDone expand(PCollection<IndexedRecord> input) {
    return input.apply(JdbcIO.<IndexedRecord> write().withDataSourceConfiguration(JdbcIO.DataSourceConfiguration
            .create(properties.getDatasetProperties().getDatastoreProperties().getCurrentDriverClass(),
                    properties.getDatasetProperties().getDatastoreProperties().jdbcUrl.getValue())
            .withUsername(properties.getDatasetProperties().getDatastoreProperties().userId.getValue())
            .withPassword(properties.getDatasetProperties().getDatastoreProperties().password.getValue()))
            .withStatement(
                    JDBCSQLBuilder.getInstance().generateSQL4Insert(properties.getDatasetProperties().tableName.getValue(),
                            properties.getDatasetProperties().main.schema.getValue()))
            .withPreparedStatementSetter(new PreparedStatementSetter<IndexedRecord>() {

                @Override
                public void setParameters(IndexedRecord indexedRecord, PreparedStatement preparedStatement) throws Exception {
                    List<Schema.Field> fields = indexedRecord.getSchema().getFields();
                    int index = 0;
                    for (Schema.Field f : fields) {
                        JDBCMapping.setValue(++index, preparedStatement, f, indexedRecord.get(f.pos()));
                    }
                }
            }));
}

Source File: JDBCInputPTransformRuntime.java From components with Apache License 2.0

6 votes

@Override
public PCollection<IndexedRecord> expand(PBegin pBegin) {
    return (PCollection<IndexedRecord>) pBegin.apply(JdbcIO
            .<IndexedRecord> read()
            .withDataSourceConfiguration(
                    JdbcIO.DataSourceConfiguration
                            .create(properties.getDatasetProperties().getDatastoreProperties().getCurrentDriverClass(),
                                    properties.getDatasetProperties().getDatastoreProperties().jdbcUrl.getValue())
                            .withUsername(properties.getDatasetProperties().getDatastoreProperties().userId.getValue())
                            .withPassword(properties.getDatasetProperties().getDatastoreProperties().password.getValue()))
            .withFetchSize(FETCH_SIZE)
            .withQuery(properties.getDatasetProperties().getSql()).withRowMapper(new JdbcIO.RowMapper<IndexedRecord>() {

                @Override
                public IndexedRecord mapRow(ResultSet resultSet) throws Exception {
                    if (factory == null) {
                        factory = new ResultSetStringRecordConverter();
                        factory.setSchema(defaultOutputCoder.getSchema());
                    }
                    return factory.convertToAvro(resultSet);
                }
            }).withCoder(getDefaultOutputCoder()));
}

Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0

5 votes

private DynamicReadFn(
    DynamicDataSourceConfiguration dataSourceConfiguration,
    ValueProvider<String> query,
    JdbcIO.RowMapper<T> rowMapper) {
  this.dataSourceConfiguration = dataSourceConfiguration;
  this.query = query;
  this.rowMapper = rowMapper;
}

Source File: JdbcConvertersTest.java From DataflowTemplates with Apache License 2.0

5 votes

@Test
public void testRowMapper() throws Exception {

    JdbcIO.RowMapper<TableRow> resultSetConverters = JdbcConverters.getResultSetToTableRow();
    TableRow actualTableRow = resultSetConverters.mapRow(resultSet);

    assertThat(expectedTableRow.size(), equalTo(actualTableRow.size()));
    assertThat(actualTableRow, equalTo(expectedTableRow));
}

Source File: IndexerPipeline.java From dataflow-opinion-analysis with Apache License 2.0

5 votes

/**
 * @param options
 * @return PTransform that reads from a JDBC source
 */
private static org.apache.beam.sdk.io.jdbc.JdbcIO.Read<InputContent> readDBRows(IndexerPipelineOptions options) {
	
	String query = IndexerPipelineUtils.buildJdbcSourceImportQuery(options);
	
	return JdbcIO.<InputContent>read()
	    .withDataSourceConfiguration(
	    	JdbcIO.DataSourceConfiguration.create(options.getJdbcDriverClassName(), options.getJdbcSourceUrl())
	    	.withUsername(options.getJdbcSourceUsername())
		    .withPassword(options.getJdbcSourcePassword())
	    )
	    .withQuery(query)
	    .withRowMapper(new RowMapper<InputContent>() {
	    	@Override
	    	public InputContent mapRow(ResultSet resultSet) throws Exception {
	    		InputContent result = new InputContent(
	        		resultSet.getString("url"),
	        		resultSet.getLong("pub_time")*1000L,
	        		resultSet.getString("title"),
	        		resultSet.getString("author"),
	        		resultSet.getString("language"),
	        		resultSet.getString("page_text"),
	        		resultSet.getString("doc_col_id"), 
	        		resultSet.getString("col_item_id"),
	        		resultSet.getInt("skip_indexing")
	    		); 
	    	  
	    		return result;
	    	}
	    })
	    .withCoder(AvroCoder.of(InputContent.class));
}

Source File: JdbcSource.java From component-runtime with Apache License 2.0

5 votes

public PCollection<JsonObject> expand(final PBegin input) {
    final WorkAroundCoder workAroundCoder = new WorkAroundCoder();
    final PCollection<JsonObject> apply = input
            .apply(JdbcIO
                    .<JsonObject> read()
                    .withRowMapper(new RecordMapper(builder))
                    .withDataSourceConfiguration(config.asBeamConfig())
                    .withQuery(config.query)
                    .withCoder(workAroundCoder));
    workAroundCoder.collection = apply;
    return apply;
}

Source File: JdbcSource.java From component-runtime with Apache License 2.0

5 votes

JdbcIO.DataSourceConfiguration asBeamConfig() {
    final JdbcIO.DataSourceConfiguration configuration = JdbcIO.DataSourceConfiguration.create(driver, url);
    if (username != null) {
        return configuration.withUsername(username).withPassword(password);
    }
    return configuration;
}

Source File: JdbcConverters.java From DataflowTemplates with Apache License 2.0

4 votes

/** Factory method for {@link ResultSetToTableRow}. */
public static JdbcIO.RowMapper<TableRow> getResultSetToTableRow() {
  return new ResultSetToTableRow();
}

Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0

4 votes

@Nullable
abstract JdbcIO.RowMapper<T> getRowMapper();

Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0

4 votes

public DynamicRead<T> withRowMapper(JdbcIO.RowMapper<T> rowMapper) {
  checkArgument(rowMapper != null, ".withRowMapper(rowMapper) called with null rowMapper");
  return toBuilder().setRowMapper(rowMapper).build();
}

Source File: SocialStatsPipeline.java From dataflow-opinion-analysis with Apache License 2.0

4 votes

/**
 * This function creates the DAG graph of transforms. It can be called from main()
 * as well as from the ControlPipeline.
 * @param options
 * @return
 * @throws Exception
 */
public static Pipeline createSocialStatsPipeline(IndexerPipelineOptions options) throws Exception {
	
    IndexerPipelineUtils.validateSocialStatsPipelineOptions(options);
	
	Pipeline pipeline = Pipeline.create(options);

	PCollection<WebresourceSocialCount> readCounts = null;
		
	String query = IndexerPipelineUtils.buildJdbcSourceImportQueryForSocialStats(options);
	
	readCounts = pipeline.apply (
           JdbcIO.<WebresourceSocialCount>read()
               .withDataSourceConfiguration(
               	JdbcIO.DataSourceConfiguration.create(options.getJdbcDriverClassName(), options.getJdbcSourceUrl())
               	.withUsername(options.getJdbcSourceUsername())
			    .withPassword(options.getJdbcSourcePassword())
               )
               .withQuery(query)
               .withRowMapper(new RowMapper<WebresourceSocialCount>() {
               	@Override
               	public WebresourceSocialCount mapRow(ResultSet resultSet) throws Exception {
               		WebresourceSocialCount result = new WebresourceSocialCount(
               				resultSet.getLong("page_pub_time")*1000L,
               				resultSet.getString("url"),
               				resultSet.getString("doc_col_id"),
               				resultSet.getString("col_item_id"),
               				resultSet.getLong("count_time")*1000L,
               				resultSet.getInt("count_tw"),
               				resultSet.getInt("count_fb")
               		); 
               	  
               		return result;
               	}
               })
               .withCoder(AvroCoder.of(WebresourceSocialCount.class))
    );
	
	// if content is to be added to bigquery, then obtain a list of
	// latest social stats per page
	PCollection<WebresourceSocialCount> countsToProcess = null;
	
	if (options.getWriteTruncate() != null && !options.getWriteTruncate() && options.getWrSocialCountHistoryWindowSec() != null) {
		String queryCache = IndexerPipelineUtils.buildBigQueryProcessedSocialCountsQuery(options);
		PCollection<KV<String,Long>> lastCountTimes = pipeline
			.apply("Get processed social count times", BigQueryIO.read().fromQuery(queryCache))
			.apply(ParDo.of(new GetLastCountTime()));

		final PCollectionView<Map<String,Long>> lastCountTimesSideInput =
				lastCountTimes.apply(View.<String,Long>asMap());
		  
		countsToProcess = readCounts
			.apply(ParDo
					.of(new DoFn<WebresourceSocialCount, WebresourceSocialCount>() {
						@ProcessElement
						public void processElement(ProcessContext c) {
							WebresourceSocialCount i = c.element();
							// check in the map if we already processed this Url, and if we haven't, add the input content to 
							// the list that needs to be processed 
							Long lastTime = c.sideInput(lastCountTimesSideInput).get(i.webResourceHash);
							
							if (lastTime == null || lastTime < i.countTime)
								c.output(i);
						}
					})
					.withSideInputs(lastCountTimesSideInput)
					);
	} else {
		countsToProcess = readCounts;
	}

	PCollection<TableRow> wrSocialCounts = countsToProcess
		.apply(ParDo.of(new CreateWrSocialCountTableRowFn()));
	
	// Now write to BigQuery
	WriteDisposition dispo = options.getWriteTruncate() ? 
			WriteDisposition.WRITE_TRUNCATE: WriteDisposition.WRITE_APPEND; 
	
	wrSocialCounts
		.apply("Write to wrsocialcount", BigQueryIO
			.writeTableRows()
			.to(getWRSocialCountTableReference(options))
			.withSchema(getWrSocialCountSchema())
			.withWriteDisposition(dispo)); 
	

	return pipeline;
}

Source File: DynamicJdbcIO.java From DataflowTemplates with Apache License 2.0

votes

abstract Builder<T> setRowMapper(JdbcIO.RowMapper<T> rowMapper);

org.apache.beam.sdk.io.jdbc.JdbcIO Java Examples