com.google.api.services.bigquery.model.TableCell Java Examples

The following examples show how to use com.google.api.services.bigquery.model.TableCell. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
private static TableRow createBqRow(Table.Row tokenizedValue, String[] headers) {
  TableRow bqRow = new TableRow();
  AtomicInteger headerIndex = new AtomicInteger(0);
  List<TableCell> cells = new ArrayList<>();
  tokenizedValue
      .getValuesList()
      .forEach(
          value -> {
            String checkedHeaderName =
                checkHeaderName(headers[headerIndex.getAndIncrement()].toString());
            bqRow.set(checkedHeaderName, value.getStringValue());
            cells.add(new TableCell().set(checkedHeaderName, value.getStringValue()));
          });
  bqRow.setF(cells);
  return bqRow;
}
 
Example #2
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {
  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example #3
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example #4
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private static TableRow createBqRow(Table.Row tokenizedValue, String[] headers) {
  TableRow bqRow = new TableRow();
  AtomicInteger headerIndex = new AtomicInteger(0);
  List<TableCell> cells = new ArrayList<>();
  tokenizedValue
      .getValuesList()
      .forEach(
          value -> {
            String checkedHeaderName =
                checkHeaderName(headers[headerIndex.getAndIncrement()].toString());
            bqRow.set(checkedHeaderName, value.getStringValue());
            cells.add(new TableCell().set(checkedHeaderName, value.getStringValue()));
          });
  bqRow.setF(cells);
  return bqRow;
}
 
Example #5
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example #6
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example #7
Source File: BigQueryInsertErrorCoderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecodeEncodeEqual() throws Exception {
  BigQueryInsertError value =
      new BigQueryInsertError(
          new TableRow().setF(Collections.singletonList(new TableCell().setV("Value"))),
          new TableDataInsertAllResponse.InsertErrors()
              .setIndex(0L)
              .setErrors(
                  Collections.singletonList(
                      new ErrorProto()
                          .setReason("a Reason")
                          .setLocation("A location")
                          .setMessage("A message")
                          .setDebugInfo("The debug info"))),
          new TableReference()
              .setProjectId("dummy-project-id")
              .setDatasetId("dummy-dataset-id")
              .setTableId("dummy-table-id"));

  CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
}
 
Example #8
Source File: BigqueryMatcherTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private QueryResponse createResponseContainingTestData() {
  TableCell field1 = new TableCell();
  field1.setV("abc");
  TableCell field2 = new TableCell();
  field2.setV("2");
  TableCell field3 = new TableCell();
  field3.setV("testing BigQuery matcher.");
  TableRow row = new TableRow();
  row.setF(Lists.newArrayList(field1, field2, field3));

  QueryResponse response = new QueryResponse();
  response.setJobComplete(true);
  response.setRows(Lists.newArrayList(row));
  response.setTotalRows(BigInteger.ONE);
  return response;
}
 
Example #9
Source File: BigQueryInterpreter.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
public static String printRows(final GetQueryResultsResponse response) {
  StringBuilder msg = new StringBuilder();
  try {
    List<String> schemNames = new ArrayList<String>();
    for (TableFieldSchema schem: response.getSchema().getFields()) {
      schemNames.add(schem.getName());
    }
    msg.append(Joiner.on(TAB).join(schemNames));
    msg.append(NEWLINE);
    for (TableRow row : response.getRows()) {
      List<String> fieldValues = new ArrayList<String>();
      for (TableCell field : row.getF()) {
        fieldValues.add(field.getV().toString());
      }
      msg.append(Joiner.on(TAB).join(fieldValues));
      msg.append(NEWLINE);
    }
    return msg.toString();
  } catch (NullPointerException ex) {
    throw new NullPointerException("SQL Execution returned an error!");
  }
}
 
Example #10
Source File: BigqueryMatcher.java    From beam with Apache License 2.0 5 votes vote down vote up
private String generateHash(@Nonnull List<TableRow> rows) {
  List<HashCode> rowHashes = Lists.newArrayList();
  for (TableRow row : rows) {
    List<String> cellsInOneRow = Lists.newArrayList();
    for (TableCell cell : row.getF()) {
      cellsInOneRow.add(Objects.toString(cell.getV()));
      Collections.sort(cellsInOneRow);
    }
    rowHashes.add(Hashing.sha1().hashString(cellsInOneRow.toString(), StandardCharsets.UTF_8));
  }
  return Hashing.combineUnordered(rowHashes).toString();
}
 
Example #11
Source File: BigqueryMatcher.java    From beam with Apache License 2.0 5 votes vote down vote up
private String formatRows(int totalNumRows) {
  StringBuilder samples = new StringBuilder();
  List<TableRow> rows = response.getRows();
  for (int i = 0; i < totalNumRows && i < rows.size(); i++) {
    samples.append(String.format("%n\t\t"));
    for (TableCell field : rows.get(i).getF()) {
      samples.append(String.format("%-10s", field.getV()));
    }
  }
  if (rows.size() > totalNumRows) {
    samples.append(String.format("%n\t\t..."));
  }
  return samples.toString();
}
 
Example #12
Source File: BigQueryUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private TableRow rawRow(Object... args) {
  List<TableCell> cells = new ArrayList<>();
  for (Object a : args) {
    cells.add(new TableCell().setV(a));
  }
  return new TableRow().setF(cells);
}
 
Example #13
Source File: BigQueryToTableIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void verifyNewTypesQueryRes(String outputTable) throws Exception {
  List<String> newTypeQueryExpectedRes =
      ImmutableList.of(
          "abc=,2000-01-01,00:00:00",
          "dec=,3000-12-31,23:59:59.990000",
          "xyw=,2011-01-01,23:59:59.999999");
  QueryResponse response =
      BQ_CLIENT.queryWithRetries(
          String.format("SELECT bytes, date, time FROM [%s];", outputTable), project);
  List<TableRow> tableRows =
      getTableRowsFromQuery(
          String.format("SELECT bytes, date, time FROM [%s];", outputTable), MAX_RETRY);
  List<String> tableResult =
      tableRows.stream()
          .map(
              row -> {
                String res = "";
                for (TableCell cell : row.getF()) {
                  if (res.isEmpty()) {
                    res = cell.getV().toString();
                  } else {
                    res = res + "," + cell.getV().toString();
                  }
                }
                return res;
              })
          .sorted()
          .collect(Collectors.toList());
  assertEquals(newTypeQueryExpectedRes, tableResult);
}
 
Example #14
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the query results for the given job as an ImmutableTable, row-keyed by row number
 * (indexed from 1), column-keyed by the TableFieldSchema for that field, and with the value
 * object as the cell value.  Note that null values will not actually be null (since we're using
 * ImmutableTable) but they can be checked for using Data.isNull().
 *
 * <p>This table is fully materialized in memory (not lazily loaded), so it should not be used
 * with queries expected to return large results.
 */
private ImmutableTable<Integer, TableFieldSchema, Object> getQueryResults(Job job) {
  try {
    ImmutableTable.Builder<Integer, TableFieldSchema, Object> builder =
        new ImmutableTable.Builder<>();
    String pageToken = null;
    int rowNumber = 1;
    while (true) {
      GetQueryResultsResponse queryResults = bigquery.jobs()
            .getQueryResults(getProjectId(), job.getJobReference().getJobId())
            .setPageToken(pageToken)
            .execute();
      // If the job isn't complete yet, retry; getQueryResults() waits for up to 10 seconds on
      // each invocation so this will effectively poll for completion.
      if (queryResults.getJobComplete()) {
        List<TableFieldSchema> schemaFields = queryResults.getSchema().getFields();
        for (TableRow row : queryResults.getRows()) {
          Iterator<TableFieldSchema> fieldIterator = schemaFields.iterator();
          Iterator<TableCell> cellIterator = row.getF().iterator();
          while (fieldIterator.hasNext() && cellIterator.hasNext()) {
            builder.put(rowNumber, fieldIterator.next(), cellIterator.next().getV());
          }
          rowNumber++;
        }
        pageToken = queryResults.getPageToken();
        if (pageToken == null) {
          break;
        }
      }
    }
    return builder.build();
  } catch (IOException e) {
    throw BigqueryJobFailureException.create(e);
  }
}
 
Example #15
Source File: S3Import.java    From dlp-dataflow-deidentification with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) throws IOException {

  try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
    if (!c.element().getValue().isEmpty()) {
      ContentItem contentItem =
          ContentItem.newBuilder().setValue(c.element().getValue()).build();
      this.requestBuilder.setItem(contentItem);

      if (this.requestBuilder.build().getSerializedSize() > DLP_PAYLOAD_LIMIT) {
        String errorMessage =
            String.format(
                "Payload Size %s Exceeded Batch Size %s",
                this.requestBuilder.build().getSerializedSize(), DLP_PAYLOAD_LIMIT);
        c.output(apiResponseFailedElements, errorMessage);
      } else {

        InspectContentResponse response =
            dlpServiceClient.inspectContent(this.requestBuilder.build());

        String timestamp =
            TIMESTAMP_FORMATTER.print(Instant.now().toDateTime(DateTimeZone.UTC));

        response
            .getResult()
            .getFindingsList()
            .forEach(
                finding -> {
                  List<TableCell> cells = new ArrayList<>();
                  TableRow row = new TableRow();

                  cells.add(new TableCell().set("file_name", c.element().getKey()));
                  row.set("file_name", c.element().getKey());

                  cells.add(new TableCell().set("inspection_timestamp", timestamp));
                  row.set("inspection_timestamp", timestamp);

                  cells.add(new TableCell().set("infoType", finding.getInfoType().getName()));
                  row.set("infoType", finding.getInfoType().getName());

                  cells.add(new TableCell().set("likelihood", finding.getLikelihood().name()));
                  row.set("likelihood", finding.getLikelihood().name());

                  row.setF(cells);

                  c.output(apiResponseSuccessElements, KV.of(BQ_TABLE_NAME, row));
                });

        numberOfBytesInspected.inc(contentItem.getSerializedSize());
        response
            .findInitializationErrors()
            .forEach(
                error -> {
                  c.output(apiResponseFailedElements, error.toString());
                });
      }
    }

  } catch (Exception e) {

    c.output(apiResponseFailedElements, e.toString());
  }
}
 
Example #16
Source File: S3Import.java    From dlp-dataflow-deidentification with Apache License 2.0 4 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  // When TableRow is created in earlier steps, setF() was
  // used to setup TableCells so that Table Schema can be constructed

  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {

    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    String type = Util.typeCheck(object.get(header).toString());
    LOG.debug("Type {}, header {}, value {}", type, header, object.get(header).toString());
    if (type.equals("RECORD")) {
      String keyValuePair = object.get(header).toString();
      String[] records = keyValuePair.split(",");
      List<TableFieldSchema> nestedFields = new ArrayList<TableFieldSchema>();

      for (int j = 0; j < records.length; j++) {
        String[] element = records[j].substring(1).split("=");
        String elementValue = element[1].substring(0, element[1].length() - 1);
        String elementType = Util.typeCheck(elementValue.trim());
        LOG.debug(
            "element header {} , element type {}, element Value {}",
            element[0],
            elementType,
            elementValue);
        nestedFields.add(new TableFieldSchema().setName(element[0]).setType(elementType));
      }
      fields.add(new TableFieldSchema().setName(header).setType(type).setFields(nestedFields));

    } else {
      fields.add(new TableFieldSchema().setName(header).setType(type));
    }
  }
  schema.setFields(fields);
  return schema;
}