Example #1
 * Creates ReadSession for schema extraction.
 * @param client BigQueryStorage client used to create ReadSession.
 * @param tableString String that represents table to export from.
 * @param tableReadOptions TableReadOptions that specify any fields in the table to filter on.
 * @return session ReadSession object that contains the schema for the export.
static ReadSession create(
    BigQueryStorageClient client, String tableString, TableReadOptions tableReadOptions) {
  TableReference tableReference = BigQueryHelpers.parseTableSpec(tableString);
  String parentProjectId = "projects/" + tableReference.getProjectId();

  TableReferenceProto.TableReference storageTableRef =

  CreateReadSessionRequest.Builder builder =
  try {
    return client.createReadSession(;
  } catch (InvalidArgumentException iae) {
    LOG.error("Error creating ReadSession: " + iae.getMessage());
    throw new RuntimeException(iae);
Example #2
private static BeamTableStatistics getRowCountFromBQ(PipelineOptions o, String bqLocation) {
  try {
    BigInteger rowCount =
  , BigQueryHelpers.parseTableSpec(bqLocation));

    if (rowCount == null) {
      return BeamTableStatistics.BOUNDED_UNKNOWN;

    return BeamTableStatistics.createBoundedTableStatistics(rowCount.doubleValue());

  } catch (IOException | InterruptedException e) {
    LOG.warn("Could not get the row count for the table " + bqLocation, e);

  return BeamTableStatistics.BOUNDED_UNKNOWN;
Example #3
Map<TableRow, List<TableDataInsertAllResponse.InsertErrors>> getInsertErrors() {
  Map<TableRow, List<TableDataInsertAllResponse.InsertErrors>> parsedInsertErrors =
  synchronized (tables) {
    for (Map.Entry<String, List<String>> entry : this.insertErrors.entrySet()) {
      TableRow tableRow = BigQueryHelpers.fromJsonString(entry.getKey(), TableRow.class);
      List<TableDataInsertAllResponse.InsertErrors> allErrors = Lists.newArrayList();
      for (String errorsString : entry.getValue()) {
                errorsString, TableDataInsertAllResponse.InsertErrors.class));
      parsedInsertErrors.put(tableRow, allErrors);
  return parsedInsertErrors;
Example #4
 * Cause a given {@link TableRow} object to fail when it's inserted. The errors link the list will
 * be returned on subsequent retries, and the insert will succeed when the errors run out.
public void failOnInsert(
    Map<TableRow, List<TableDataInsertAllResponse.InsertErrors>> insertErrors) {
  synchronized (tables) {
    for (Map.Entry<TableRow, List<TableDataInsertAllResponse.InsertErrors>> entry :
        insertErrors.entrySet()) {
      List<String> errorStrings = Lists.newArrayList();
      for (TableDataInsertAllResponse.InsertErrors errors : entry.getValue()) {
      this.insertErrors.put(BigQueryHelpers.toJsonString(entry.getKey()), errorStrings);
Example #5
public static String encodeQueryResult(Table table, List<TableRow> rows) throws IOException {
  KvCoder<String, List<TableRow>> coder =
      KvCoder.of(StringUtf8Coder.of(), ListCoder.of(TableRowJsonCoder.of()));
  KV<String, List<TableRow>> kv = KV.of(BigQueryHelpers.toJsonString(table), rows);
  ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
  coder.encode(kv, outputStream);
  return Base64.encodeBase64String(outputStream.toByteArray());
Example #6
public static KV<Table, List<TableRow>> decodeQueryResult(String queryResult) throws IOException {
  KvCoder<String, List<TableRow>> coder =
      KvCoder.of(StringUtf8Coder.of(), ListCoder.of(TableRowJsonCoder.of()));
  ByteArrayInputStream inputStream = new ByteArrayInputStream(Base64.decodeBase64(queryResult));
  KV<String, List<TableRow>> kv = coder.decode(inputStream);
  Table table = BigQueryHelpers.fromJsonString(kv.getKey(), Table.class);
  List<TableRow> rows = kv.getValue();
  return KV.of(table, rows);
Example #7
 * Return the appropriate table destination instance for the given document type and other
 * attributes.
public TableDestination getTableDestination(Map<String, String> attributes) {
  attributes = new HashMap<>(attributes);

  // We coerce all docType and namespace names to be snake_case and to remove invalid
  // characters; these transformations MUST match with the transformations applied by the
  // jsonschema-transpiler and mozilla-schema-generator when creating table schemas in BigQuery.
  final String namespace = attributes.get(Attribute.DOCUMENT_NAMESPACE);
  final String docType = attributes.get(Attribute.DOCUMENT_TYPE);
  if (namespace != null) {
    attributes.put(Attribute.DOCUMENT_NAMESPACE, getAndCacheNormalizedName(namespace));
  if (docType != null) {
    attributes.put(Attribute.DOCUMENT_TYPE, getAndCacheNormalizedName(docType));

  // Only letters, numbers, and underscores are allowed in BigQuery dataset and table names,
  // but some doc types and namespaces contain '-', so we convert to '_'; we don't pass all
  // values through getAndCacheBqName to avoid expensive regex operations and polluting the
  // cache of transformed field names.
  attributes = Maps.transformValues(attributes, v -> v.replaceAll("-", "_"));

  final String tableSpec = StringSubstitutor.replace(tableSpecTemplate.get(), attributes);

  // Send to error collection if incomplete tableSpec; $ is not a valid char in tableSpecs.
  if (tableSpec.contains("$")) {
    throw new IllegalArgumentException("Element did not contain all the attributes needed to"
        + " fill out variables in the configured BigQuery output template: "
        + tableSpecTemplate.get());

  final TableDestination tableDestination = new TableDestination(tableSpec, null,
      new TimePartitioning().setField(partitioningField.get()),
      new Clustering().setFields(clusteringFields.get()));
  final TableReference ref = BigQueryHelpers.parseTableSpec(tableSpec);
  final DatasetReference datasetRef = new DatasetReference().setProjectId(ref.getProjectId())

  if (bqService == null) {
    bqService = BigQueryOptions.newBuilder().setProjectId(ref.getProjectId())

  // Get and cache a listing of table names for this dataset.
  Set<String> tablesInDataset;
  if (tableListingCache == null) {
    // We need to be very careful about settings for the cache here. We have had significant
    // issues in the past due to exceeding limits on BigQuery API requests; see
    tableListingCache = CacheBuilder.newBuilder().expireAfterWrite(Duration.ofMinutes(10))
  try {
    tablesInDataset = tableListingCache.get(datasetRef, () -> {
      Set<String> tableSet = new HashSet<>();
      Dataset dataset = bqService.getDataset(ref.getDatasetId());
      if (dataset != null) {
        dataset.list().iterateAll().forEach(t -> {
      return tableSet;
  } catch (ExecutionException e) {
    throw new UncheckedExecutionException(e.getCause());

  // Send to error collection if dataset or table doesn't exist so BigQueryIO doesn't throw a
  // pipeline execution exception.
  if (tablesInDataset.isEmpty()) {
    throw new IllegalArgumentException("Resolved destination dataset does not exist or has no "
        + " tables for tableSpec " + tableSpec);
  } else if (!tablesInDataset.contains(ref.getTableId())) {
    throw new IllegalArgumentException("Resolved destination table does not exist: " + tableSpec);

  return tableDestination;
Example #8
private TableId generateTableId(String specKey) {
  TableDestination tableDestination = BigQuerySinkHelpers.getTableDestination(dataset, specKey);
  TableReference tableReference = BigQueryHelpers.parseTableSpec(tableDestination.getTableSpec());
  return TableId.of(
      tableReference.getProjectId(), tableReference.getDatasetId(), tableReference.getTableId());
Example #9
public void encode(TableSchema value, OutputStream outStream)
    throws CoderException, IOException {
  stringCoder.encode(BigQueryHelpers.toJsonString(value), outStream);
Example #10
public TableSchema decode(InputStream inStream) throws CoderException, IOException {
  return BigQueryHelpers.fromJsonString(stringCoder.decode(inStream), TableSchema.class);
Example #11
private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load)
    throws InterruptedException, IOException {
  TableReference destination = load.getDestinationTable();
  TableSchema schema = load.getSchema();
  checkArgument(schema != null, "No schema specified");
  List<ResourceId> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
  WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
  CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());

  Table existingTable = datasetService.getTable(destination);
  if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
    return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
  if (existingTable == null) {
    TableReference strippedDestination =
    existingTable = new Table().setTableReference(strippedDestination).setSchema(schema);
    if (load.getTimePartitioning() != null) {
      existingTable = existingTable.setTimePartitioning(load.getTimePartitioning());
    if (load.getClustering() != null) {
      existingTable = existingTable.setClustering(load.getClustering());

  List<TableRow> rows = Lists.newArrayList();
  for (ResourceId filename : sourceFiles) {
    if (load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON")) {
    } else if (load.getSourceFormat().equals("AVRO")) {
      rows.addAll(readAvroTableRows(filename.toString(), schema));

  datasetService.insertAll(destination, rows, null);
  return new JobStatus().setState("DONE");
Example #12
public <T> long insertAll(
    TableReference ref,
    List<ValueInSingleWindow<TableRow>> rowList,
    @Nullable List<String> insertIdList,
    InsertRetryPolicy retryPolicy,
    List<ValueInSingleWindow<T>> failedInserts,
    ErrorContainer<T> errorContainer,
    boolean skipInvalidRows,
    boolean ignoreUnknownValues,
    boolean ignoreInsertIds)
    throws IOException, InterruptedException {
  Map<TableRow, List<TableDataInsertAllResponse.InsertErrors>> insertErrors = getInsertErrors();
  synchronized (tables) {
    if (ignoreInsertIds) {
      insertIdList = null;

    if (insertIdList != null) {
      assertEquals(rowList.size(), insertIdList.size());

    long dataSize = 0;
    TableContainer tableContainer =
    for (int i = 0; i < rowList.size(); ++i) {
      TableRow row = rowList.get(i).getValue();
      List<TableDataInsertAllResponse.InsertErrors> allErrors = insertErrors.get(row);
      boolean shouldInsert = true;
      if (allErrors != null) {
        for (TableDataInsertAllResponse.InsertErrors errors : allErrors) {
          if (!retryPolicy.shouldRetry(new Context(errors))) {
            shouldInsert = false;
      if (shouldInsert) {
        if (insertIdList == null) {
          dataSize += tableContainer.addRow(row, null);
        } else {
          dataSize += tableContainer.addRow(row, insertIdList.get(i));
      } else {
            failedInserts, allErrors.get(allErrors.size() - 1), ref, rowList.get(i));
    return dataSize;