com.google.api.services.bigquery.model.TableRow Java Examples
The following examples show how to use
com.google.api.services.bigquery.model.TableRow.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TopWikipediaSessions.java From beam with Apache License 2.0 | 7 votes |
@Override public PCollection<String> expand(PCollection<TableRow> input) { return input .apply(ParDo.of(new ExtractUserAndTimestamp())) .apply( "SampleUsers", ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { if (Math.abs((long) c.element().hashCode()) <= Integer.MAX_VALUE * samplingThreshold) { c.output(c.element()); } } })) .apply(new ComputeSessions()) .apply("SessionsToStrings", ParDo.of(new SessionsToStringsDoFn())) .apply(new TopPerMonth()) .apply("FormatOutput", ParDo.of(new FormatOutputDoFn())); }
Example #2
Source File: ErrorConverters.java From DataflowTemplates with Apache License 2.0 | 7 votes |
@ProcessElement public void processElement(ProcessContext context) { FailsafeElement<String, String> failsafeElement = context.element(); final String message = failsafeElement.getOriginalPayload(); // Format the timestamp for insertion String timestamp = TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC)); // Build the table row final TableRow failedRow = new TableRow() .set("timestamp", timestamp) .set("errorMessage", failsafeElement.getErrorMessage()) .set("stacktrace", failsafeElement.getStacktrace()); // Only set the payload if it's populated on the message. if (message != null) { failedRow .set("payloadString", message) .set("payloadBytes", message.getBytes(StandardCharsets.UTF_8)); } context.output(failedRow); }
Example #3
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWriteFailedJobs() throws Exception { p.apply( Create.of( new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)) .withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expect(RuntimeException.class); thrown.expectMessage("Failed to create job with prefix"); thrown.expectMessage("reached max retries"); thrown.expectMessage("last failed job"); p.run(); }
Example #4
Source File: FakeJobService.java From beam with Apache License 2.0 | 6 votes |
private long writeRows( String tableId, List<TableRow> rows, TableSchema schema, String destinationPattern) throws IOException { Schema avroSchema = BigQueryUtils.toGenericAvroSchema(tableId, schema.getFields()); List<TableRow> rowsToWrite = Lists.newArrayList(); int shard = 0; for (TableRow row : rows) { rowsToWrite.add(row); if (rowsToWrite.size() == 5) { writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++); rowsToWrite.clear(); } } if (!rowsToWrite.isEmpty()) { writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++); } return shard; }
Example #5
Source File: TemplatePipelineTest.java From gcp-batch-ingestion-bigquery with Apache License 2.0 | 6 votes |
@Test public void test_parse_CSV_format_successfully_with_tablerow() throws Exception { List<String> input = new ArrayList<>(); input.add("2018,8,13,Wikinews,English,Spanish football: Sevilla signs Aleix Vidal from FC Barcelona,12331"); List<TableRow> output = fnTester.processBundle(input); Assert.assertThat(output, is(not(empty()))); Assert.assertThat(output.get(0).get("year"), is(equalTo("2018"))); Assert.assertThat(output.get(0).get("month"), is(equalTo("8"))); Assert.assertThat(output.get(0).get("day"), is(equalTo("13"))); Assert.assertThat(output.get(0).get("wikimedia_project"), is(equalTo("Wikinews"))); Assert.assertThat(output.get(0).get("language"), is(equalTo("English"))); Assert.assertThat(output.get(0).get("title"), is(equalTo("Spanish football: Sevilla signs Aleix Vidal from FC Barcelona"))); Assert.assertThat(output.get(0).get("views"), is(equalTo("12331"))); }
Example #6
Source File: ExactDollarRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 | 6 votes |
@Override public void processElement(ProcessContext c) { Double dollars = c.element(); TableRow r = new TableRow(); r.set("dollar_turnover", dollars); // the timing can be: // EARLY: the dollar amount is not yet final // ON_TIME: dataflow thinks the dollar amount is final but late data are still possible // LATE: late data has arrived r.set("dollar_timing", c.pane().getTiming()); // EARLY, ON_TIME or LATE r.set("dollar_window", ((IntervalWindow) c.window()).start().getMillis() / 1000.0 / 60.0); // timestamp in fractional minutes LOG.info("Outputting $ value {}} at {} with marker {} for window {}", dollars.toString(), new Date().getTime(), c.pane().getTiming().toString(), c.window().hashCode()); c.output(r); }
Example #7
Source File: BigQueryIOStorageQueryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testQuerySourceCreateReader() throws Exception { BigQueryStorageQuerySource<TableRow> querySource = BigQueryStorageQuerySource.create( /* stepUuid = */ "testStepUuid", ValueProvider.StaticValueProvider.of("SELECT * FROM `dataset.table`"), /* flattenResults = */ false, /* useLegacySql = */ false, /* priority = */ QueryPriority.INTERACTIVE, /* location = */ "asia-northeast1", /* queryTempDataset = */ null, /* kmsKey = */ null, new TableRowParser(), TableRowJsonCoder.of(), fakeBigQueryServices); thrown.expect(UnsupportedOperationException.class); thrown.expectMessage("BigQuery storage source must be split before reading"); querySource.createReader(options); }
Example #8
Source File: BigQueryMapper.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Extracts and applies new column information to BigQuery by comparing the TableRow against the * BigQuery Table. Retries the supplied number of times before failing. * * @param tableId a TableId referencing the BigQuery table to be loaded to. * @param row a TableRow with the raw data to be loaded into BigQuery. * @param inputSchema The source schema lookup to be used in mapping. * @param retries Number of remaining retries before error is raised. */ private void applyMapperToTableRow( TableId tableId, TableRow row, Map<String, LegacySQLTypeName> inputSchema, int retries) { try { updateTableIfRequired(tableId, row, inputSchema); } catch (Exception e) { if (retries > 0) { LOG.info("RETRY TABLE UPDATE - enter: {}", String.valueOf(retries)); try { Thread.sleep(2000); } catch (InterruptedException i) { throw e; } LOG.info("RETRY TABLE UPDATE - apply: {}", String.valueOf(retries)); applyMapperToTableRow(tableId, row, inputSchema, retries - 1); } else { LOG.info("RETRY TABLE UPDATE - throw: {}", String.valueOf(retries)); throw e; } } }
Example #9
Source File: BigQueryDynamicConverters.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public TableSchema getSchema(KV<TableId, TableRow> destination) { TableRow bqRow = destination.getValue(); TableSchema schema = new TableSchema(); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); List<TableCell> cells = bqRow.getF(); for (int i = 0; i < cells.size(); i++) { Map<String, Object> object = cells.get(i); String header = object.keySet().iterator().next(); /** currently all BQ data types are set to String */ // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING")); fields.add(new TableFieldSchema().setName(header).setType("STRING")); } schema.setFields(fields); return schema; }
Example #10
Source File: BigqueryMatcherTest.java From beam with Apache License 2.0 | 6 votes |
private QueryResponse createResponseContainingTestData() { TableCell field1 = new TableCell(); field1.setV("abc"); TableCell field2 = new TableCell(); field2.setV("2"); TableCell field3 = new TableCell(); field3.setV("testing BigQuery matcher."); TableRow row = new TableRow(); row.setF(Lists.newArrayList(field1, field2, field3)); QueryResponse response = new QueryResponse(); response.setJobComplete(true); response.setRows(Lists.newArrayList(row)); response.setTotalRows(BigInteger.ONE); return response; }
Example #11
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWriteUnknown() throws Exception { p.apply( Create.of( new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)) .withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expect(RuntimeException.class); thrown.expectMessage("Failed to create job"); p.run(); }
Example #12
Source File: TriggerExampleTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(ValidatesRunner.class) public void testTotalFlow() { PCollection<KV<String, Integer>> flow = pipeline .apply(Create.timestamped(TIME_STAMPED_INPUT)) .apply(ParDo.of(new ExtractFlowInfo())); PCollection<TableRow> totalFlow = flow.apply(Window.into(FixedWindows.of(Duration.standardMinutes(1)))) .apply(new TotalFlow("default")); PCollection<String> results = totalFlow.apply(ParDo.of(new FormatResults())); PAssert.that(results) .containsInAnyOrder(canonicalFormat(OUT_ROW_1), canonicalFormat(OUT_ROW_2)); pipeline.run().waitUntilFinish(); }
Example #13
Source File: MergeInfoMapper.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<MergeInfo> expand(PCollection<KV<TableId, TableRow>> input) { return input.apply( MapElements.into(TypeDescriptor.of(MergeInfo.class)) .via( element -> { return MergeInfo.create( METADATA_TIMESTAMP, // TODO should be list pulled from Datastream API METADATA_DELETED, String.format("%s.%s", // Staging Table // TODO these should possibly be passed separately BigQueryConverters .formatStringTemplate(stagingDataset, element.getValue()), BigQueryConverters .formatStringTemplate(stagingTable, element.getValue())), String.format("%s.%s", // Replica Table BigQueryConverters .formatStringTemplate(replicaDataset, element.getValue()), BigQueryConverters .formatStringTemplate(replicaTable, element.getValue())), ImmutableList.copyOf(element.getValue().keySet()), ImmutableList.of("ID")); })); }
Example #14
Source File: FakeJobService.java From beam with Apache License 2.0 | 6 votes |
private boolean validateDispositions( Table table, CreateDisposition createDisposition, WriteDisposition writeDisposition) throws InterruptedException, IOException { if (table == null) { if (createDisposition == CreateDisposition.CREATE_NEVER) { return false; } } else if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) { datasetService.deleteTable(table.getTableReference()); } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) { List<TableRow> allRows = datasetService.getAllRows( table.getTableReference().getProjectId(), table.getTableReference().getDatasetId(), table.getTableReference().getTableId()); if (!allRows.isEmpty()) { return false; } } return true; }
Example #15
Source File: BigQueryInsertErrorCoderTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testDecodeEncodeEqual() throws Exception { BigQueryInsertError value = new BigQueryInsertError( new TableRow().setF(Collections.singletonList(new TableCell().setV("Value"))), new TableDataInsertAllResponse.InsertErrors() .setIndex(0L) .setErrors( Collections.singletonList( new ErrorProto() .setReason("a Reason") .setLocation("A location") .setMessage("A message") .setDebugInfo("The debug info"))), new TableReference() .setProjectId("dummy-project-id") .setDatasetId("dummy-dataset-id") .setTableId("dummy-table-id")); CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value); }
Example #16
Source File: JoinExamples.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void processElement(ProcessContext c) { TableRow row = c.element(); String countryCode = (String) row.get("FIPSCC"); String countryName = (String) row.get("HumanName"); c.output(KV.of(countryCode, countryName)); }
Example #17
Source File: BigQueryToTableIT.java From beam with Apache License 2.0 | 5 votes |
private void verifyNewTypesQueryRes(String outputTable) throws Exception { List<String> newTypeQueryExpectedRes = ImmutableList.of( "abc=,2000-01-01,00:00:00", "dec=,3000-12-31,23:59:59.990000", "xyw=,2011-01-01,23:59:59.999999"); QueryResponse response = BQ_CLIENT.queryWithRetries( String.format("SELECT bytes, date, time FROM [%s];", outputTable), project); List<TableRow> tableRows = getTableRowsFromQuery( String.format("SELECT bytes, date, time FROM [%s];", outputTable), MAX_RETRY); List<String> tableResult = tableRows.stream() .map( row -> { String res = ""; for (TableCell cell : row.getF()) { if (res.isEmpty()) { res = cell.getV().toString(); } else { res = res + "," + cell.getV().toString(); } } return res; }) .sorted() .collect(Collectors.toList()); assertEquals(newTypeQueryExpectedRes, tableResult); }
Example #18
Source File: BigQueryIOWriteTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWriteWithBrokenGetTable() throws Exception { p.apply(Create.<TableRow>of(new TableRow().set("foo", "bar"))) .apply( BigQueryIO.writeTableRows() .to(input -> null) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expectMessage("result of tableFunction can not be null"); thrown.expectMessage("foo"); p.run(); }
Example #19
Source File: FilterRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 | 5 votes |
@Override public void processElement(ProcessContext c) { TableRow ride = c.element(); // filter rides in lower Manhattan only float lat = Float.parseFloat(ride.get("latitude").toString()); float lon = Float.parseFloat(ride.get("longitude").toString()); if (lon > -74.747 && lon < -73.969) if (lat > 40.699 && lat < 40.720) { c.output(ride); LOG.info("Accepted ride lat: {} lon: {} ", lat, lon); return; } }
Example #20
Source File: BigqueryMatcher.java From beam with Apache License 2.0 | 5 votes |
private String generateHash(@Nonnull List<TableRow> rows) { List<HashCode> rowHashes = Lists.newArrayList(); for (TableRow row : rows) { List<String> cellsInOneRow = Lists.newArrayList(); for (TableCell cell : row.getF()) { cellsInOneRow.add(Objects.toString(cell.getV())); Collections.sort(cellsInOneRow); } rowHashes.add(Hashing.sha1().hashString(cellsInOneRow.toString(), StandardCharsets.UTF_8)); } return Hashing.combineUnordered(rowHashes).toString(); }
Example #21
Source File: BigQueryTornadoesTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(ValidatesRunner.class) public void testFormatCounts() { PCollection<KV<Integer, Long>> inputs = p.apply(Create.of(KV.of(3, 0L), KV.of(4, Long.MAX_VALUE), KV.of(5, Long.MIN_VALUE))); PCollection<TableRow> result = inputs.apply(ParDo.of(new FormatCountsFn())); PAssert.that(result) .containsInAnyOrder( new TableRow().set("month", 3).set("tornado_count", 0), new TableRow().set("month", 4).set("tornado_count", Long.MAX_VALUE), new TableRow().set("month", 5).set("tornado_count", Long.MIN_VALUE)); p.run().waitUntilFinish(); }
Example #22
Source File: TemplatePipeline.java From gcp-batch-ingestion-bigquery with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext c) throws Exception { if (c.element().equalsIgnoreCase(HEADER)) return; String[] split = c.element().split(","); if (split.length > 7) return; TableRow row = new TableRow(); for (int i = 0; i < split.length; i++) { TableFieldSchema col = getTableSchema().getFields().get(i); row.set(col.getName(), split[i]); } c.output(row); }
Example #23
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEvaluatedDisplayData() { DisplayDataEvaluator evaluator = DisplayDataEvaluator.create(); BigQueryIO.TypedRead<TableRow> typedRead = BigQueryIO.read(new TableRowParser()) .withCoder(TableRowJsonCoder.of()) .withMethod(Method.DIRECT_READ) .from("foo.com:project:dataset.table"); Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(typedRead); assertThat(displayData, hasItem(hasDisplayItem("table"))); }
Example #24
Source File: BigQueryConverters.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext context) { TableRow row = context.element(); try { context.output(FailsafeElement.of(row, tableRowToJson(row))); successCounter.inc(); } catch (Exception e) { context.output( this.transformDeadletterOutTag, FailsafeElement.of(row, row.toString()) .setErrorMessage(e.getMessage()) .setStacktrace(Throwables.getStackTraceAsString(e))); failedCounter.inc(); } }
Example #25
Source File: BigQueryHllSketchCompatibilityIT.java From beam with Apache License 2.0 | 5 votes |
private void writeSketchToBigQuery(List<String> testData, String expectedChecksum) { String tableSpec = String.format("%s.%s", DATASET_ID, SKETCH_TABLE_ID); String query = String.format("SELECT HLL_COUNT.EXTRACT(%s) FROM %s", SKETCH_FIELD_NAME, tableSpec); TableSchema tableSchema = new TableSchema() .setFields( Collections.singletonList( new TableFieldSchema().setName(SKETCH_FIELD_NAME).setType(SKETCH_FIELD_TYPE))); TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class); Pipeline p = Pipeline.create(options); p.apply(Create.of(testData).withType(TypeDescriptor.of(String.class))) .apply(HllCount.Init.forStrings().globally()) .apply( BigQueryIO.<byte[]>write() .to(tableSpec) .withSchema(tableSchema) .withFormatFunction( sketch -> // Empty sketch is represented by empty byte array in Beam and by null in // BigQuery new TableRow().set(SKETCH_FIELD_NAME, sketch.length == 0 ? null : sketch)) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); // BigqueryMatcher will send a query to retrieve the estimated count and verifies its // correctness using checksum. assertThat( createQueryUsingStandardSql(APP_NAME, PROJECT_ID, query), queryResultHasChecksum(expectedChecksum)); }
Example #26
Source File: WriteResult.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@link PCollection} containing the {@link TableRow}s that didn't made it to BQ. * * <p>Only use this method if you haven't enabled {@link * BigQueryIO.Write#withExtendedErrorInfo()}. Otherwise use {@link * WriteResult#getFailedInsertsWithErr()} */ public PCollection<TableRow> getFailedInserts() { checkArgument( failedInsertsTag != null, "Cannot use getFailedInserts as this WriteResult uses extended errors" + " information. Use getFailedInsertsWithErr instead"); return failedInserts; }
Example #27
Source File: ErrorConverters.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext context) { FailsafeElement<KV<String, String>, String> failsafeElement = context.element(); KV<String, String> message = failsafeElement.getOriginalPayload(); // Format the timestamp for insertion String timestamp = TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC)); String payloadString = "key: " + (message.getKey() == null ? "" : message.getKey()) + "value: " + (message.getValue() == null ? "" : message.getValue()); byte[] payloadBytes = (message.getValue() == null ? "".getBytes(StandardCharsets.UTF_8) : message.getValue().getBytes(StandardCharsets.UTF_8)); // Build the table row TableRow failedRow = new TableRow() .set("timestamp", timestamp) .set("errorMessage", failsafeElement.getErrorMessage()) .set("stacktrace", failsafeElement.getStacktrace()) .set("payloadString", payloadString) .set("payloadBytes", payloadBytes); context.output(failedRow); }
Example #28
Source File: TrafficRoutes.java From beam with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext c) { RouteInfo routeInfo = c.element().getValue(); TableRow row = new TableRow() .set("avg_speed", routeInfo.getAvgSpeed()) .set("slowdown_event", routeInfo.getSlowdownEvent()) .set("route", c.element().getKey()) .set("window_timestamp", c.timestamp().toString()); c.output(row); }
Example #29
Source File: StreamingWriteTables.java From beam with Apache License 2.0 | 5 votes |
StreamingWriteTables<ElementT> withToTableRow( SerializableFunction<ElementT, TableRow> toTableRow) { return new StreamingWriteTables<>( bigQueryServices, retryPolicy, extendedErrorInfo, skipInvalidRows, ignoreUnknownValues, ignoreInsertIds, elementCoder, toTableRow); }
Example #30
Source File: JoinExamples.java From beam with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // the following two 'applys' create multiple inputs to our pipeline, one for each // of our two input sources. PCollection<TableRow> eventsTable = p.apply(BigQueryIO.readTableRows().from(GDELT_EVENTS_TABLE)); PCollection<TableRow> countryCodes = p.apply(BigQueryIO.readTableRows().from(COUNTRY_CODES)); PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes); formattedResults.apply(TextIO.write().to(options.getOutput())); p.run().waitUntilFinish(); }