com.google.api.services.bigquery.model.DatasetReference Java Examples

The following examples show how to use com.google.api.services.bigquery.model.DatasetReference. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BqExtractOperatorFactory.java    From digdag with Apache License 2.0 6 votes vote down vote up
@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationExtract cfg = new JobConfigurationExtract();

    try {
        cfg.setDestinationUris(params.getList("destination", String.class));
    }
    catch (ConfigException ignore) {
        cfg.setDestinationUri(params.get("destination", String.class));
    }

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);
    String sourceTable = params.get("_command", String.class);
    cfg.setSourceTable(tableReference(projectId, defaultDataset, sourceTable));

    params.getOptional("print_header", boolean.class).transform(cfg::setPrintHeader);
    params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter);
    params.getOptional("destination_format", String.class).transform(cfg::setDestinationFormat);
    params.getOptional("compression", String.class).transform(cfg::setCompression);

    return new JobConfiguration()
            .setExtract(cfg);
}
 
Example #2
Source File: CheckedBigquery.java    From nomulus with Apache License 2.0 6 votes vote down vote up
/**
 * Ensures the dataset exists by trying to create it. Note that it's not appreciably cheaper
 * to check for dataset existence than it is to try to create it and check for exceptions.
 */
// Note that these are not static so they can be mocked for testing.
private void ensureDataset(Bigquery bigquery, String projectId, String datasetId)
    throws IOException {
  try {
    bigquery.datasets()
        .insert(projectId,
            new Dataset().setDatasetReference(
                new DatasetReference()
                    .setProjectId(projectId)
                    .setDatasetId(datasetId)))
        .execute();
  } catch (IOException e) {
    // Swallow errors about a duplicate dataset, and throw any other ones.
    if (!BigqueryJobFailureException.create(e).getReason().equals("duplicate")) {
      throw e;
    }
  }
}
 
Example #3
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 6 votes vote down vote up
private Table table(String defaultProjectId, Optional<DatasetReference> defaultDataset, TableConfig config)
{
    Optional<String> datasetId = config.dataset().or(defaultDataset.transform(DatasetReference::getDatasetId));
    if (!datasetId.isPresent()) {
        throw new ConfigException("Bad table reference or configuration: Missing 'dataset'");
    }
    return new Table()
            .setTableReference(new TableReference()
                    .setProjectId(config.project().or(defaultProjectId))
                    .setDatasetId(datasetId.get())
                    .setTableId(config.id()))
            .setSchema(config.schema().orNull())
            .setFriendlyName(config.friendly_name().orNull())
            .setExpirationTime(config.expiration_time()
                    .transform(p -> p.getTimestamp().toInstant(request.getTimeZone()).toEpochMilli()).orNull())
            .setTimePartitioning(config.time_partitioning().orNull())
            .setView(config.view().orNull());
}
 
Example #4
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 6 votes vote down vote up
private Table table(String defaultProjectId, Optional<DatasetReference> defaultDataset, JsonNode node)
{
    if (node.isTextual()) {
        return new Table()
                .setTableReference(Bq.tableReference(defaultProjectId, defaultDataset, node.asText()));
    }
    else {
        TableConfig config;
        try {
            config = objectMapper.readValue(node.traverse(), TableConfig.class);
        }
        catch (IOException e) {
            throw new ConfigException("Invalid table reference or configuration: " + node, e);
        }
        return table(defaultProjectId, defaultDataset, config);
    }
}
 
Example #5
Source File: ExampleUtils.java    From deployment-examples with MIT License 5 votes vote down vote up
private void setupBigQueryTable(
    String projectId, String datasetId, String tableId, TableSchema schema) throws IOException {
  if (bigQueryClient == null) {
    bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build();
  }

  Datasets datasetService = bigQueryClient.datasets();
  if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
    Dataset newDataset =
        new Dataset()
            .setDatasetReference(
                new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
    datasetService.insert(projectId, newDataset).execute();
  }

  Tables tableService = bigQueryClient.tables();
  Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
  if (table == null) {
    Table newTable =
        new Table()
            .setSchema(schema)
            .setTableReference(
                new TableReference()
                    .setProjectId(projectId)
                    .setDatasetId(datasetId)
                    .setTableId(tableId));
    tableService.insert(projectId, datasetId, newTable).execute();
  } else if (!table.getSchema().equals(schema)) {
    throw new RuntimeException(
        "Table exists and schemas do not match, expecting: "
            + schema.toPrettyString()
            + ", actual: "
            + table.getSchema().toPrettyString());
  }
}
 
Example #6
Source File: GcpUtil.java    From digdag with Apache License 2.0 5 votes vote down vote up
static Dataset createDataset(Bigquery bq, String projectId, String datasetId)
        throws IOException, RetryExecutor.RetryGiveupException
{
    Dataset dataset = new Dataset()
            .setDatasetReference(new DatasetReference()
                    .setDatasetId(datasetId));
    Dataset created = createDataset(bq, projectId, dataset);
    assertThat(datasetExists(bq, projectId, datasetId), is(true));
    return created;
}
 
Example #7
Source File: BigQueryIT.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoad()
        throws Exception
{
    assumeThat(GCS_TEST_BUCKET, not(isEmptyOrNullString()));

    // Create source data object
    String objectName = GCS_PREFIX + "test.csv";
    byte[] data = Joiner.on('\n').join("a,b", "c,d").getBytes(UTF_8);
    InputStreamContent content = new InputStreamContent("text/csv", new ByteArrayInputStream(data))
            .setLength(data.length);
    StorageObject metadata = new StorageObject().setName(objectName);
    retryExecutor.run(() -> gcs.objects()
            .insert(GCS_TEST_BUCKET, metadata, content)
            .execute());

    // Create output dataset
    String datasetId = BQ_TAG + "_load_test";
    Dataset dataset = new Dataset().setDatasetReference(new DatasetReference()
            .setProjectId(gcpProjectId)
            .setDatasetId(datasetId));
    retryExecutor.run(() -> bq.datasets().insert(gcpProjectId, dataset)
            .execute());

    // Run load
    String tableId = "data";
    addWorkflow(projectDir, "acceptance/bigquery/load.dig");
    Id attemptId = pushAndStart(server.endpoint(), projectDir, "load", ImmutableMap.of(
            "source_bucket", GCS_TEST_BUCKET,
            "source_object", objectName,
            "target_dataset", datasetId,
            "target_table", tableId,
            "outfile", outfile.toString()));
    expect(Duration.ofMinutes(5), attemptSuccess(server.endpoint(), attemptId));
    assertThat(Files.exists(outfile), is(true));

    // Check that destination table was created
    Table destinationTable = retryExecutor.run(() -> bq.tables().get(gcpProjectId, datasetId, tableId).execute());
    assertThat(destinationTable.getTableReference().getTableId(), is(tableId));
}
 
Example #8
Source File: BqOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationQuery cfg = new JobConfigurationQuery()
            .setQuery(query);

    cfg.setUseLegacySql(params.get("use_legacy_sql", boolean.class, false));

    params.getOptional("allow_large_results", boolean.class).transform(cfg::setAllowLargeResults);
    params.getOptional("use_query_cache", Boolean.class).transform(cfg::setUseQueryCache);
    params.getOptional("create_disposition", String.class).transform(cfg::setCreateDisposition);
    params.getOptional("write_disposition", String.class).transform(cfg::setWriteDisposition);
    params.getOptional("flatten_results", Boolean.class).transform(cfg::setFlattenResults);
    params.getOptional("maximum_billing_tier", Integer.class).transform(cfg::setMaximumBillingTier);
    params.getOptional("priority", String.class).transform(cfg::setPriority);

    params.getOptional("table_definitions", new TypeReference<Map<String, ExternalDataConfiguration>>() {})
            .transform(cfg::setTableDefinitions);
    params.getOptional("user_defined_function_resources", new TypeReference<List<UserDefinedFunctionResource>>() {})
            .transform(cfg::setUserDefinedFunctionResources);

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);
    defaultDataset.transform(cfg::setDefaultDataset);

    params.getOptional("destination_table", String.class)
            .transform(s -> cfg.setDestinationTable(tableReference(projectId, defaultDataset, s)));

    return new JobConfiguration()
            .setQuery(cfg);
}
 
Example #9
Source File: Bq.java    From digdag with Apache License 2.0 5 votes vote down vote up
static DatasetReference datasetReference(Optional<String> defaultProjectId, String s)
{
    Matcher matcher = DATASET_REFERENCE_PATTERN.matcher(s);
    if (!matcher.matches()) {
        throw new IllegalArgumentException("Bad dataset reference: " + s);
    }
    return new DatasetReference()
            .setProjectId(Optional.fromNullable(matcher.group("project")).or(defaultProjectId).orNull())
            .setDatasetId(matcher.group("dataset"));
}
 
Example #10
Source File: Bq.java    From digdag with Apache License 2.0 5 votes vote down vote up
static TableReference tableReference(String defaultProjectId, Optional<DatasetReference> defaultDataset, String s)
{
    Matcher matcher = TABLE_REFERENCE_PATTERN.matcher(s);
    if (!matcher.matches()) {
        throw new IllegalArgumentException("Bad table reference: " + s);
    }

    String project = matcher.group("project");
    if (project == null) {
        if (defaultDataset.isPresent() && defaultDataset.get().getProjectId() != null) {
            project = defaultDataset.get().getProjectId();
        }
        else {
            project = defaultProjectId;
        }
    }

    Optional<String> dataset = Optional.fromNullable(matcher.group("dataset"))
            .or(defaultDataset.transform(DatasetReference::getDatasetId));

    String table = matcher.group("table");

    if (!dataset.isPresent()) {
        throw new IllegalArgumentException("Bad table reference. Either configure 'dataset' or include dataset name in table reference: " + s);
    }

    return new TableReference()
            .setProjectId(project)
            .setDatasetId(dataset.get())
            .setTableId(table);
}
 
Example #11
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
private Dataset dataset(String defaultProjectId, DatasetConfig config)
{
    return new Dataset()
            .setDatasetReference(new DatasetReference()
                    .setProjectId(config.project().or(defaultProjectId))
                    .setDatasetId(config.id()))
            .setFriendlyName(config.friendly_name().orNull())
            .setDefaultTableExpirationMs(config.default_table_expiration().transform(d -> d.getDuration().toMillis()).orNull())
            .setLocation(config.location().orNull())
            .setAccess(config.access().orNull())
            .setLabels(config.labels().orNull());
}
 
Example #12
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
private BqOperation deleteDataset(JsonNode config)
{
    if (!config.isTextual()) {
        throw new ConfigException("Bad dataset reference: " + config);
    }
    return (bq, projectId) -> {
        DatasetReference r = datasetReference(projectId, config.asText());
        bq.deleteDataset(r.getProjectId(), r.getDatasetId());
    };
}
 
Example #13
Source File: BqLoadOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationLoad cfg = new JobConfigurationLoad()
            .setSourceUris(sourceUris(params));

    if (params.has("schema")) {
        cfg.setSchema(tableSchema(params));
    }

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);

    String destinationTable = params.get("destination_table", String.class);
    cfg.setDestinationTable(tableReference(projectId, defaultDataset, destinationTable));

    params.getOptional("create_disposition", String.class).transform(cfg::setCreateDisposition);
    params.getOptional("write_disposition", String.class).transform(cfg::setWriteDisposition);

    params.getOptional("source_format", String.class).transform(cfg::setSourceFormat);
    params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter);
    params.getOptional("skip_leading_rows", int.class).transform(cfg::setSkipLeadingRows);
    params.getOptional("encoding", String.class).transform(cfg::setEncoding);
    params.getOptional("quote", String.class).transform(cfg::setQuote);
    params.getOptional("max_bad_records", int.class).transform(cfg::setMaxBadRecords);
    params.getOptional("allow_quoted_newlines", boolean.class).transform(cfg::setAllowQuotedNewlines);
    params.getOptional("allow_jagged_rows", boolean.class).transform(cfg::setAllowJaggedRows);
    params.getOptional("ignore_unknown_values", boolean.class).transform(cfg::setIgnoreUnknownValues);
    Optional.of(params.getListOrEmpty("projection_fields", String.class)).transform(cfg::setProjectionFields);
    params.getOptional("autodetect", boolean.class).transform(cfg::setAutodetect);
    Optional.of(params.getListOrEmpty("schema_update_options", String.class)).transform(cfg::setSchemaUpdateOptions);

    return new JobConfiguration()
            .setLoad(cfg);
}
 
Example #14
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Helper that creates a dataset with this name if it doesn't already exist, and returns true
 * if creation took place.
 */
public boolean createDatasetIfNeeded(String datasetName) throws IOException {
  if (!checkDatasetExists(datasetName)) {
    bigquery.datasets()
        .insert(getProjectId(), new Dataset().setDatasetReference(new DatasetReference()
            .setProjectId(getProjectId())
            .setDatasetId(datasetName)))
        .execute();
    logger.atInfo().log("Created dataset: %s:%s\n", getProjectId(), datasetName);
    return true;
  }
  return false;
}
 
Example #15
Source File: FakeDatasetService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Dataset getDataset(String projectId, String datasetId)
    throws IOException, InterruptedException {
  synchronized (tables) {
    Map<String, TableContainer> dataset = tables.get(projectId, datasetId);
    if (dataset == null) {
      throwNotFound(
          "Tried to get a dataset %s:%s, but no such table was set", projectId, datasetId);
    }
    return new Dataset()
        .setDatasetReference(
            new DatasetReference().setDatasetId(datasetId).setProjectId(projectId));
  }
}
 
Example #16
Source File: ExampleUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
private void setupBigQueryTable(
    String projectId, String datasetId, String tableId, TableSchema schema) throws IOException {
  if (bigQueryClient == null) {
    bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build();
  }

  Datasets datasetService = bigQueryClient.datasets();
  if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
    Dataset newDataset =
        new Dataset()
            .setDatasetReference(
                new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
    datasetService.insert(projectId, newDataset).execute();
  }

  Tables tableService = bigQueryClient.tables();
  Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
  if (table == null) {
    Table newTable =
        new Table()
            .setSchema(schema)
            .setTableReference(
                new TableReference()
                    .setProjectId(projectId)
                    .setDatasetId(datasetId)
                    .setTableId(tableId));
    tableService.insert(projectId, datasetId, newTable).execute();
  } else if (!table.getSchema().equals(schema)) {
    throw new RuntimeException(
        "Table exists and schemas do not match, expecting: "
            + schema.toPrettyString()
            + ", actual: "
            + table.getSchema().toPrettyString());
  }
}
 
Example #17
Source File: Bq.java    From digdag with Apache License 2.0 4 votes vote down vote up
static DatasetReference datasetReference(String s)
{
    return datasetReference(Optional.absent(), s);
}
 
Example #18
Source File: Bq.java    From digdag with Apache License 2.0 4 votes vote down vote up
static DatasetReference datasetReference(String defaultProjectId, String s)
{
    return datasetReference(Optional.of(defaultProjectId), s);
}
 
Example #19
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 4 votes vote down vote up
/** Returns dataset reference that can be used to avoid having to specify dataset in SQL code. */
public DatasetReference getDataset() {
  return new DatasetReference()
      .setProjectId(getProjectId())
      .setDatasetId(getDatasetId());
}
 
Example #20
Source File: BigQueryIT.java    From digdag with Apache License 2.0 4 votes vote down vote up
@Test
public void testExtract()
        throws Exception
{
    assumeThat(GCS_TEST_BUCKET, not(isEmptyOrNullString()));

    // Create source table
    String tableId = "data";
    String datasetId = BQ_TAG + "_extract_test";
    Dataset dataset = new Dataset().setDatasetReference(new DatasetReference()
            .setProjectId(gcpProjectId)
            .setDatasetId(datasetId));
    retryExecutor.run(() -> bq.datasets().insert(gcpProjectId, dataset)
            .execute());
    Table table = new Table().setTableReference(new TableReference()
            .setProjectId(gcpProjectId)
            .setTableId(tableId))
            .setSchema(new TableSchema()
                    .setFields(ImmutableList.of(
                            new TableFieldSchema().setName("foo").setType("STRING"),
                            new TableFieldSchema().setName("bar").setType("STRING")
                    )));
    retryExecutor.run(() -> bq.tables().insert(gcpProjectId, datasetId, table)
            .execute());

    // Populate source table
    TableDataInsertAllRequest content = new TableDataInsertAllRequest()
            .setRows(ImmutableList.of(
                    new TableDataInsertAllRequest.Rows().setJson(ImmutableMap.of(
                            "foo", "a",
                            "bar", "b")),
                    new TableDataInsertAllRequest.Rows().setJson(ImmutableMap.of(
                            "foo", "c",
                            "bar", "d"))));
    retryExecutor.run(() -> bq.tabledata().insertAll(gcpProjectId, datasetId, tableId, content)
            .execute());

    // Run extract
    String objectName = GCS_PREFIX + "test.csv";
    addWorkflow(projectDir, "acceptance/bigquery/extract.dig");
    Id attemptId = pushAndStart(server.endpoint(), projectDir, "extract", ImmutableMap.of(
            "src_dataset", datasetId,
            "src_table", tableId,
            "dst_bucket", GCS_TEST_BUCKET,
            "dst_object", objectName,
            "outfile", outfile.toString()));
    expect(Duration.ofMinutes(5), attemptSuccess(server.endpoint(), attemptId));
    assertThat(Files.exists(outfile), is(true));

    // Check that destination file was created
    StorageObject metadata = retryExecutor.run(() -> gcs.objects().get(GCS_TEST_BUCKET, objectName)
            .execute());
    assertThat(metadata.getName(), is(objectName));
    ByteArrayOutputStream data = new ByteArrayOutputStream();
    retryExecutor.run(() -> {
        try {
            gcs.objects().get(GCS_TEST_BUCKET, objectName)
                    .executeMediaAndDownloadTo(data);
        }
        catch (IOException e) {
            throw Throwables.propagate(e);
        }
    });
}
 
Example #21
Source File: KeyByBigQueryTableDestination.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
/**
 * Return the appropriate table destination instance for the given document type and other
 * attributes.
 */
public TableDestination getTableDestination(Map<String, String> attributes) {
  attributes = new HashMap<>(attributes);

  // We coerce all docType and namespace names to be snake_case and to remove invalid
  // characters; these transformations MUST match with the transformations applied by the
  // jsonschema-transpiler and mozilla-schema-generator when creating table schemas in BigQuery.
  final String namespace = attributes.get(Attribute.DOCUMENT_NAMESPACE);
  final String docType = attributes.get(Attribute.DOCUMENT_TYPE);
  if (namespace != null) {
    attributes.put(Attribute.DOCUMENT_NAMESPACE, getAndCacheNormalizedName(namespace));
  }
  if (docType != null) {
    attributes.put(Attribute.DOCUMENT_TYPE, getAndCacheNormalizedName(docType));
  }

  // Only letters, numbers, and underscores are allowed in BigQuery dataset and table names,
  // but some doc types and namespaces contain '-', so we convert to '_'; we don't pass all
  // values through getAndCacheBqName to avoid expensive regex operations and polluting the
  // cache of transformed field names.
  attributes = Maps.transformValues(attributes, v -> v.replaceAll("-", "_"));

  final String tableSpec = StringSubstitutor.replace(tableSpecTemplate.get(), attributes);

  // Send to error collection if incomplete tableSpec; $ is not a valid char in tableSpecs.
  if (tableSpec.contains("$")) {
    throw new IllegalArgumentException("Element did not contain all the attributes needed to"
        + " fill out variables in the configured BigQuery output template: "
        + tableSpecTemplate.get());
  }

  final TableDestination tableDestination = new TableDestination(tableSpec, null,
      new TimePartitioning().setField(partitioningField.get()),
      new Clustering().setFields(clusteringFields.get()));
  final TableReference ref = BigQueryHelpers.parseTableSpec(tableSpec);
  final DatasetReference datasetRef = new DatasetReference().setProjectId(ref.getProjectId())
      .setDatasetId(ref.getDatasetId());

  if (bqService == null) {
    bqService = BigQueryOptions.newBuilder().setProjectId(ref.getProjectId())
        .setRetrySettings(RETRY_SETTINGS).build().getService();
  }

  // Get and cache a listing of table names for this dataset.
  Set<String> tablesInDataset;
  if (tableListingCache == null) {
    // We need to be very careful about settings for the cache here. We have had significant
    // issues in the past due to exceeding limits on BigQuery API requests; see
    // https://bugzilla.mozilla.org/show_bug.cgi?id=1623000
    tableListingCache = CacheBuilder.newBuilder().expireAfterWrite(Duration.ofMinutes(10))
        .build();
  }
  try {
    tablesInDataset = tableListingCache.get(datasetRef, () -> {
      Set<String> tableSet = new HashSet<>();
      Dataset dataset = bqService.getDataset(ref.getDatasetId());
      if (dataset != null) {
        dataset.list().iterateAll().forEach(t -> {
          tableSet.add(t.getTableId().getTable());
        });
      }
      return tableSet;
    });
  } catch (ExecutionException e) {
    throw new UncheckedExecutionException(e.getCause());
  }

  // Send to error collection if dataset or table doesn't exist so BigQueryIO doesn't throw a
  // pipeline execution exception.
  if (tablesInDataset.isEmpty()) {
    throw new IllegalArgumentException("Resolved destination dataset does not exist or has no "
        + " tables for tableSpec " + tableSpec);
  } else if (!tablesInDataset.contains(ref.getTableId())) {
    throw new IllegalArgumentException("Resolved destination table does not exist: " + tableSpec);
  }

  return tableDestination;
}
 
Example #22
Source File: AbstractBigQueryIoIntegrationTestBase.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp()
    throws IOException, GeneralSecurityException {
  MockitoAnnotations.initMocks(this);

  LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE);
  LoggerConfig.getConfig(BigQueryUtils.class).setLevel(Level.FINE);
  LoggerConfig.getConfig(GsonRecordReader.class).setLevel(Level.FINE);

  bucketHelper = new TestBucketHelper("bq_integration_test");
  // A unique per-setUp String to avoid collisions between test runs.
  String testId = bucketHelper.getUniqueBucketPrefix();

  projectIdValue = TestConfiguration.getInstance().getProjectId();
  if (Strings.isNullOrEmpty(projectIdValue)) {
    projectIdValue = System.getenv(BIGQUERY_PROJECT_ID_ENVVARNAME);
  }

  checkArgument(
      !Strings.isNullOrEmpty(projectIdValue), "Must provide %s", BIGQUERY_PROJECT_ID_ENVVARNAME);
  testDataset = testId + "_dataset";
  testBucket = testId + "_bucket";

  // We have to create the output dataset ourselves.
  // TODO(user): Extract dataset creation into a library which is also used by
  // BigQueryOutputCommitter.
  Dataset outputDataset = new Dataset();
  DatasetReference datasetReference = new DatasetReference();
  datasetReference.setProjectId(projectIdValue);
  datasetReference.setDatasetId(testDataset);

  config = getConfigForGcsFromBigquerySettings(projectIdValue);
  BigQueryFactory factory = new BigQueryFactory();
  bigqueryInstance = factory.getBigQuery(config);

  Bigquery.Datasets datasets = bigqueryInstance.datasets();
  outputDataset.setDatasetReference(datasetReference);
  logger.atInfo().log(
      "Creating temporary dataset '%s' for project '%s'", testDataset, projectIdValue);
  datasets.insert(projectIdValue, outputDataset).execute();

  Path toCreate = new Path(String.format("gs://%s", testBucket));
  FileSystem fs = toCreate.getFileSystem(config);
  logger.atInfo().log("Creating temporary test bucket '%s'", toCreate);
  fs.mkdirs(toCreate);

  // Since the TaskAttemptContext and JobContexts are mostly used just to access a
  // "Configuration" object, we'll mock the two contexts to just return our fake configuration
  // object with which we'll provide the settings we want to test.
  config.clear();
  setConfigForGcsFromBigquerySettings();

  when(mockTaskAttemptContext.getConfiguration())
      .thenReturn(config);
  when(mockJobContext.getConfiguration())
      .thenReturn(config);

  // Have a realistic-looking fake TaskAttemptID.
  int taskNumber = 3;
  int taskAttempt = 2;
  int jobNumber = 42;
  String jobIdString = "jobid" + System.currentTimeMillis();
  JobID jobId = new JobID(jobIdString, jobNumber);
  TaskAttemptID taskAttemptId =
      new TaskAttemptID(new TaskID(jobId, false, taskNumber), taskAttempt);
  when(mockTaskAttemptContext.getTaskAttemptID())
      .thenReturn(taskAttemptId);
  when(mockJobContext.getJobID()).thenReturn(jobId);

  testTable = testId + "_table_" + jobIdString;
}