org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method Java Examples
The following examples show how to use
org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuildTableBasedSourceWithReadOptions() { TableReadOptions readOptions = TableReadOptions.newBuilder() .addSelectedFields("field1") .addSelectedFields("field2") .setRowRestriction("int_field > 5") .build(); BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table") .withReadOptions(readOptions); checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table"); assertEquals(typedRead.getReadOptions(), readOptions); }
Example #2
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
private void checkTypedReadTableObject( TypedRead typedRead, String project, String dataset, String table) { assertEquals(project, typedRead.getTable().getProjectId()); assertEquals(dataset, typedRead.getTable().getDatasetId()); assertEquals(table, typedRead.getTable().getTableId()); assertNull(typedRead.getQuery()); assertEquals(Method.DIRECT_READ, typedRead.getMethod()); }
Example #3
Source File: BigQueryIOPushDownIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void readUsingDirectReadMethodPushDown() { sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString())); BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT); PCollection<Row> output = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode) .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC))); PipelineResult result = pipeline.run(); result.waitUntilFinish(); collectAndPublishMetrics(result, "_directread_pushdown"); }
Example #4
Source File: BigQueryIOPushDownIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void readUsingDirectReadMethod() { List<RelOptRule> ruleList = new ArrayList<>(); for (RuleSet x : getRuleSets()) { x.iterator().forEachRemaining(ruleList::add); } // Remove push-down rule ruleList.remove(BeamIOPushDownRule.INSTANCE); InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore(); inMemoryMetaStore.registerProvider( new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC)); sqlEnv = BeamSqlEnv.builder(inMemoryMetaStore) .setPipelineOptions(PipelineOptionsFactory.create()) .setRuleSets(new RuleSet[] {RuleSets.ofList(ruleList)}) .build(); sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString())); BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT); PCollection<Row> output = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode) .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC))); PipelineResult result = pipeline.run(); result.waitUntilFinish(); collectAndPublishMetrics(result, "_directread"); }
Example #5
Source File: BigQueryIOPushDownIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void readUsingDefaultMethod() { sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DEFAULT.toString())); BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT); PCollection<Row> output = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode) .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC))); PipelineResult result = pipeline.run(); result.waitUntilFinish(); collectAndPublishMetrics(result, "_default"); }
Example #6
Source File: BigQueryIO.java From beam with Apache License 2.0 | 5 votes |
private Method resolveMethod(PCollection<T> input) { if (getMethod() != Method.DEFAULT) { return getMethod(); } // By default, when writing an Unbounded PCollection, we use StreamingInserts and // BigQuery's streaming import API. return (input.isBounded() == IsBounded.UNBOUNDED) ? Method.STREAMING_INSERTS : Method.FILE_LOADS; }
Example #7
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildTableBasedSource() { BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table"); checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table"); assertTrue(typedRead.getValidate()); }
Example #8
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildTableBasedSourceWithoutValidation() { BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table") .withoutValidation(); checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table"); assertFalse(typedRead.getValidate()); }
Example #9
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildTableBasedSourceWithDefaultProject() { BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("myDataset.myTable"); checkTypedReadTableObject(typedRead, null, "myDataset", "myTable"); }
Example #10
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildTableBasedSourceWithTableReference() { TableReference tableReference = new TableReference() .setProjectId("foo.com:project") .setDatasetId("dataset") .setTableId("table"); BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from(tableReference); checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table"); }
Example #11
Source File: BigQueryConverters.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PCollection<TableRow> expand(PBegin pipeline) { if (options().getQuery() == null) { LOG.info("No query provided, reading directly from: " + options().getInputTableSpec()); return pipeline.apply( "ReadFromBigQuery", BigQueryIO.readTableRows() .from(options().getInputTableSpec()) .withTemplateCompatibility() .withMethod(Method.DIRECT_READ) .withCoder(TableRowJsonCoder.of())); } else { LOG.info("Using query: " + options().getQuery()); if (!options().getUseLegacySql()) { LOG.info("Using Standard SQL"); return pipeline.apply( "ReadFromBigQueryWithQuery", BigQueryIO.readTableRows() .fromQuery(options().getQuery()) .withTemplateCompatibility() .usingStandardSql() .withCoder(TableRowJsonCoder.of())); } else { LOG.info("Using Legacy SQL"); return pipeline.apply( "ReadFromBigQueryWithQuery", BigQueryIO.readTableRows() .fromQuery(options().getQuery()) .withTemplateCompatibility() .withCoder(TableRowJsonCoder.of())); } } }
Example #12
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildSourceWithTableAndFlatten() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Invalid BigQueryIO.Read: Specifies a table with a result flattening preference," + " which only applies to queries"); p.apply( "ReadMyTable", BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table") .withoutResultFlattening()); p.run(); }
Example #13
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildSourceWithTableAndSqlDialect() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Invalid BigQueryIO.Read: Specifies a table with a SQL dialect preference," + " which only applies to queries"); p.apply( "ReadMyTable", BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table") .usingStandardSql()); p.run(); }
Example #14
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildSourceWithReadOptionsAndSelectedFields() { thrown.expect(IllegalStateException.class); thrown.expectMessage("withReadOptions() already called"); p.apply( "ReadMyTable", BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table") .withReadOptions(TableReadOptions.newBuilder().build()) .withSelectedFields(Lists.newArrayList("field1"))); }
Example #15
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuildSourceWithReadOptionsAndRowRestriction() { thrown.expect(IllegalStateException.class); thrown.expectMessage("withReadOptions() already called"); p.apply( "ReadMyTable", BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table") .withReadOptions(TableReadOptions.newBuilder().build()) .withRowRestriction("field > 1")); }
Example #16
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testDisplayData() { String tableSpec = "foo.com:project:dataset.table"; BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from(tableSpec); DisplayData displayData = DisplayData.from(typedRead); assertThat(displayData, hasDisplayItem("table", tableSpec)); }
Example #17
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEvaluatedDisplayData() { DisplayDataEvaluator evaluator = DisplayDataEvaluator.create(); BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table"); Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(typedRead); assertThat(displayData, hasItem(hasDisplayItem("table"))); }
Example #18
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testName() { assertEquals( "BigQueryIO.TypedRead", BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table") .getName()); }
Example #19
Source File: BigQueryIOStorageReadIT.java From beam with Apache License 2.0 | 5 votes |
private void runBigQueryIOStorageReadPipeline() { Pipeline p = Pipeline.create(options); PCollection<Long> count = p.apply( "Read", BigQueryIO.read(TableRowParser.INSTANCE) .from(options.getInputTable()) .withMethod(Method.DIRECT_READ)) .apply("Count", Count.globally()); PAssert.thatSingleton(count).isEqualTo(options.getNumRecords()); p.run().waitUntilFinish(); }
Example #20
Source File: BigQueryIOStorageQueryIT.java From beam with Apache License 2.0 | 5 votes |
private void runBigQueryIOStorageQueryPipeline() { Pipeline p = Pipeline.create(options); PCollection<Long> count = p.apply( "Query", BigQueryIO.read(TableRowParser.INSTANCE) .fromQuery("SELECT * FROM `" + options.getInputTable() + "`") .usingStandardSql() .withMethod(Method.DIRECT_READ)) .apply("Count", Count.globally()); PAssert.thatSingleton(count).isEqualTo(options.getNumRecords()); p.run().waitUntilFinish(); }
Example #21
Source File: BigQueryTornadoesIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void testE2eBigQueryTornadoesWithStorageApi() throws Exception { BigQueryTornadoesITOptions options = TestPipeline.testingPipelineOptions().as(BigQueryTornadoesITOptions.class); options.setReadMethod(Method.DIRECT_READ); options.setOutput( String.format( "%s.%s", "BigQueryTornadoesIT", "monthly_tornadoes_storage_" + System.currentTimeMillis())); runE2EBigQueryTornadoesTest(options); }
Example #22
Source File: BigQueryTable.java From beam with Apache License 2.0 | 5 votes |
@Override public BeamSqlTableFilter constructFilter(List<RexNode> filter) { if (method.equals(Method.DIRECT_READ)) { return new BigQueryFilter(filter); } return super.constructFilter(filter); }
Example #23
Source File: BigQueryTableProviderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testDefaultMethod_whenPropertiesAreNotSet() { Table table = fakeTable("hello"); BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table); assertEquals(Method.DIRECT_READ, sqlTable.method); }
Example #24
Source File: BigQueryTableProviderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSelectDefaultMethodExplicitly() { Table table = fakeTableWithProperties( "hello", "{ " + METHOD_PROPERTY + ": " + "\"" + Method.DEFAULT.toString() + "\" }"); BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table); assertEquals(Method.DEFAULT, sqlTable.method); }
Example #25
Source File: BigQueryTableProviderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSelectDirectReadMethod() { Table table = fakeTableWithProperties( "hello", "{ " + METHOD_PROPERTY + ": " + "\"" + Method.DIRECT_READ.toString() + "\" }"); BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table); assertEquals(Method.DIRECT_READ, sqlTable.method); }
Example #26
Source File: BigQueryTableProviderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSelectExportMethod() { Table table = fakeTableWithProperties( "hello", "{ " + METHOD_PROPERTY + ": " + "\"" + Method.EXPORT.toString() + "\" }"); BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table); assertEquals(Method.EXPORT, sqlTable.method); }
Example #27
Source File: BigQueryTable.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Row> buildIOReader( PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) { if (!method.equals(Method.DIRECT_READ)) { LOG.info("Predicate/project push-down only available for `DIRECT_READ` method, skipping."); return buildIOReader(begin); } final FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(fieldNames).resolve(getSchema()); final Schema newSchema = SelectHelpers.getOutputSchema(getSchema(), resolved); TypedRead<Row> typedRead = getBigQueryTypedRead(newSchema); if (!(filters instanceof DefaultTableFilter)) { BigQueryFilter bigQueryFilter = (BigQueryFilter) filters; if (!bigQueryFilter.getSupported().isEmpty()) { String rowRestriction = generateRowRestrictions(getSchema(), bigQueryFilter.getSupported()); if (!rowRestriction.isEmpty()) { LOG.info("Pushing down the following filter: " + rowRestriction); typedRead = typedRead.withRowRestriction(rowRestriction); } } } if (!fieldNames.isEmpty()) { typedRead = typedRead.withSelectedFields(fieldNames); } return begin.apply("Read Input BQ Rows with push-down", typedRead); }
Example #28
Source File: BigQueryHllSketchCompatibilityIT.java From beam with Apache License 2.0 | 5 votes |
private void readSketchFromBigQuery(String tableId, Long expectedCount) { String tableSpec = String.format("%s.%s", DATASET_ID, tableId); String query = String.format( "SELECT HLL_COUNT.INIT(%s) AS %s FROM %s", DATA_FIELD_NAME, QUERY_RESULT_FIELD_NAME, tableSpec); SerializableFunction<SchemaAndRecord, byte[]> parseQueryResultToByteArray = input -> // BigQuery BYTES type corresponds to Java java.nio.ByteBuffer type HllCount.getSketchFromByteBuffer( (ByteBuffer) input.getRecord().get(QUERY_RESULT_FIELD_NAME)); TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class); Pipeline p = Pipeline.create(options); PCollection<Long> result = p.apply( BigQueryIO.read(parseQueryResultToByteArray) .fromQuery(query) .usingStandardSql() .withMethod(Method.DIRECT_READ) .withCoder(ByteArrayCoder.of())) .apply(HllCount.MergePartial.globally()) // no-op, only for testing MergePartial .apply(HllCount.Extract.globally()); PAssert.thatSingleton(result).isEqualTo(expectedCount); p.run().waitUntilFinish(); }
Example #29
Source File: BigQueryTornadoesIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void testE2EBigQueryTornadoesWithExport() throws Exception { BigQueryTornadoesITOptions options = TestPipeline.testingPipelineOptions().as(BigQueryTornadoesITOptions.class); options.setReadMethod(Method.EXPORT); options.setOutput( String.format( "%s.%s", "BigQueryTornadoesIT", "monthly_tornadoes_" + System.currentTimeMillis())); runE2EBigQueryTornadoesTest(options); }
Example #30
Source File: BigQueryToParquet.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** * Runs the pipeline with the supplied options. * * @param options The execution parameters to the pipeline. * @return The result of the pipeline execution. */ private static PipelineResult run(BigQueryToParquetOptions options) { // Create the pipeline. Pipeline pipeline = Pipeline.create(options); TableReadOptions.Builder builder = TableReadOptions.newBuilder(); /* Add fields to filter export on, if any. */ if (options.getFields() != null) { builder.addAllSelectedFields(Arrays.asList(options.getFields().split(",\\s*"))); } TableReadOptions tableReadOptions = builder.build(); BigQueryStorageClient client = BigQueryStorageClientFactory.create(); ReadSession session = ReadSessionFactory.create(client, options.getTableRef(), tableReadOptions); // Extract schema from ReadSession Schema schema = getTableSchema(session); client.close(); /* * Steps: 1) Read records from BigQuery via BigQueryIO. * 2) Write records to Google Cloud Storage in Parquet format. */ pipeline /* * Step 1: Read records via BigQueryIO using supplied schema as a PCollection of * {@link GenericRecord}. */ .apply( "ReadFromBigQuery", BigQueryIO.read(SchemaAndRecord::getRecord) .from(options.getTableRef()) .withTemplateCompatibility() .withMethod(Method.DIRECT_READ) .withCoder(AvroCoder.of(schema)) .withReadOptions(tableReadOptions)) /* * Step 2: Write records to Google Cloud Storage as one or more Parquet files * via {@link ParquetIO}. */ .apply( "WriteToParquet", FileIO.<GenericRecord>write() .via(ParquetIO.sink(schema)) .to(options.getBucket()) .withNumShards(options.getNumShards()) .withSuffix(FILE_SUFFIX)); // Execute the pipeline and return the result. return pipeline.run(); }