com.google.api.services.bigquery.model.JobConfigurationExtract Java Examples

The following examples show how to use com.google.api.services.bigquery.model.JobConfigurationExtract. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: FakeJobService.java From beam with Apache License 2.0

6 votes

@Override
public void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
    throws IOException {
  checkArgument(
      "AVRO".equals(extractConfig.getDestinationFormat()), "Only extract to AVRO is supported");
  synchronized (allJobs) {
    verifyUniqueJobId(jobRef.getJobId());
    ++numExtractJobCalls;

    Job job = new Job();
    job.setJobReference(jobRef);
    job.setConfiguration(new JobConfiguration().setExtract(extractConfig));
    job.setKind(" bigquery#job");
    job.setStatus(new JobStatus().setState("PENDING"));
    allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
  }
}

Example #2

Source File: FakeJobService.java From beam with Apache License 2.0

6 votes

private JobStatus runExtractJob(Job job, JobConfigurationExtract extract)
    throws InterruptedException, IOException {
  TableReference sourceTable = extract.getSourceTable();

  List<TableRow> rows =
      datasetService.getAllRows(
          sourceTable.getProjectId(), sourceTable.getDatasetId(), sourceTable.getTableId());
  TableSchema schema = datasetService.getTable(sourceTable).getSchema();
  List<Long> destinationFileCounts = Lists.newArrayList();
  for (String destination : extract.getDestinationUris()) {
    destinationFileCounts.add(writeRows(sourceTable.getTableId(), rows, schema, destination));
  }
  job.setStatistics(
      new JobStatistics()
          .setExtract(new JobStatistics4().setDestinationUriFileCounts(destinationFileCounts)));
  return new JobStatus().setState("DONE");
}

Example #3

Source File: BigqueryConnection.java From nomulus with Apache License 2.0

6 votes

/**
 * Starts an asynchronous job to extract the specified source table and output it to the
 * given GCS filepath in the specified destination format, optionally printing headers.
 * Returns a ListenableFuture that holds the destination GCS URI on success.
 */
private ListenableFuture<String> extractTable(
    DestinationTable sourceTable,
    String destinationUri,
    DestinationFormat destinationFormat,
    boolean printHeader) {
  checkArgument(sourceTable.type == TableType.TABLE);
  Job job = new Job()
      .setConfiguration(new JobConfiguration()
          .setExtract(new JobConfigurationExtract()
              .setSourceTable(sourceTable.getTableReference())
              .setDestinationFormat(destinationFormat.toString())
              .setDestinationUris(ImmutableList.of(destinationUri))
              .setPrintHeader(printHeader)));
  return runJobToCompletion(job, destinationUri);
}

Example #4

Source File: BqExtractOperatorFactory.java From digdag with Apache License 2.0

6 votes

@Override
protected JobConfiguration jobConfiguration(String projectId)
{
    JobConfigurationExtract cfg = new JobConfigurationExtract();

    try {
        cfg.setDestinationUris(params.getList("destination", String.class));
    }
    catch (ConfigException ignore) {
        cfg.setDestinationUri(params.get("destination", String.class));
    }

    Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class)
            .transform(Bq::datasetReference);
    String sourceTable = params.get("_command", String.class);
    cfg.setSourceTable(tableReference(projectId, defaultDataset, sourceTable));

    params.getOptional("print_header", boolean.class).transform(cfg::setPrintHeader);
    params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter);
    params.getOptional("destination_format", String.class).transform(cfg::setDestinationFormat);
    params.getOptional("compression", String.class).transform(cfg::setCompression);

    return new JobConfiguration()
            .setExtract(cfg);
}

Example #5

Source File: BigQueryServicesImpl.java From beam with Apache License 2.0

5 votes

/**
 * {@inheritDoc}
 *
 * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds.
 *
 * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
 */
@Override
public void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
    throws InterruptedException, IOException {
  Job job =
      new Job()
          .setJobReference(jobRef)
          .setConfiguration(new JobConfiguration().setExtract(extractConfig));

  startJob(job, errorExtractor, client);
}

Example #6

Source File: BigQuerySourceBase.java From beam with Apache License 2.0

5 votes

private List<ResourceId> executeExtract(
    String jobId,
    TableReference table,
    JobService jobService,
    String executingProject,
    String extractDestinationDir,
    String bqLocation)
    throws InterruptedException, IOException {

  JobReference jobRef =
      new JobReference().setProjectId(executingProject).setLocation(bqLocation).setJobId(jobId);

  String destinationUri = BigQueryIO.getExtractDestinationUri(extractDestinationDir);
  JobConfigurationExtract extract =
      new JobConfigurationExtract()
          .setSourceTable(table)
          .setDestinationFormat("AVRO")
          .setDestinationUris(ImmutableList.of(destinationUri));

  LOG.info("Starting BigQuery extract job: {}", jobId);
  jobService.startExtractJob(jobRef, extract);
  Job extractJob = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
  if (BigQueryHelpers.parseStatus(extractJob) != Status.SUCCEEDED) {
    throw new IOException(
        String.format(
            "Extract job %s failed, status: %s.",
            extractJob.getJobReference().getJobId(),
            BigQueryHelpers.statusToPrettyString(extractJob.getStatus())));
  }

  LOG.info("BigQuery extract job completed: {}", jobId);

  return BigQueryIO.getExtractFilePaths(extractDestinationDir, extractJob);
}

Example #7

Source File: AbstractExportToCloudStorage.java From hadoop-connectors with Apache License 2.0

5 votes

@Override
public void beginExport() throws IOException {
  // Create job and configuration.
  JobConfigurationExtract extractConfig = new JobConfigurationExtract();

  // Set source.
  extractConfig.setSourceTable(tableToExport.getTableReference());

  // Set destination.
  extractConfig.setDestinationUris(getExportPaths());
  extractConfig.set(DESTINATION_FORMAT_KEY, fileFormat.getFormatIdentifier());

  JobConfiguration config = new JobConfiguration();
  config.setExtract(extractConfig);

  JobReference jobReference =
      bigQueryHelper.createJobReference(
          projectId, "exporttocloudstorage", tableToExport.getLocation());

  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  try {
    Job response = bigQueryHelper.insertJobOrFetchDuplicate(projectId, job);
    logger.atFine().log("Got response '%s'", response);
    exportJobReference = response.getJobReference();
  } catch (IOException e) {
    String error = String.format(
        "Error while exporting table %s",
        BigQueryStrings.toString(tableToExport.getTableReference()));
    throw new IOException(error, e);
  }
}

Example #8

Source File: BigQueryServices.java From beam with Apache License 2.0

4 votes

/** Start a BigQuery extract job. */
void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
    throws InterruptedException, IOException;

Example #9

Source File: BigQueryHelper.java From hadoop-connectors with Apache License 2.0

4 votes

/**
 * Exports BigQuery results into GCS, polls for completion before returning.
 *
 * @param projectId the project on whose behalf to perform the export.
 * @param tableRef the table to export.
 * @param gcsPaths the GCS paths to export to.
 * @param awaitCompletion if true, block and poll until job completes, otherwise return as soon as
 *     the job has been successfully dispatched.
 * @throws IOException on IO error.
 * @throws InterruptedException on interrupt.
 */
public void exportBigQueryToGcs(
    String projectId, TableReference tableRef, List<String> gcsPaths, boolean awaitCompletion)
    throws IOException, InterruptedException {
  logger.atFine().log(
      "exportBigQueryToGcs(bigquery, '%s', '%s', '%s', '%s')",
      projectId, BigQueryStrings.toString(tableRef), gcsPaths, awaitCompletion);
  logger.atInfo().log(
      "Exporting table '%s' to %s paths; path[0] is '%s'; awaitCompletion: %s",
      BigQueryStrings.toString(tableRef),
      gcsPaths.size(),
      gcsPaths.isEmpty() ? "(empty)" : gcsPaths.get(0),
      awaitCompletion);

  // Create job and configuration.
  JobConfigurationExtract extractConfig = new JobConfigurationExtract();

  // Set source.
  extractConfig.setSourceTable(tableRef);

  // Set destination.
  extractConfig.setDestinationUris(gcsPaths);
  extractConfig.set("destinationFormat", "NEWLINE_DELIMITED_JSON");

  JobConfiguration config = new JobConfiguration();
  config.setExtract(extractConfig);

  // Get the table to determine the location
  Table table = getTable(tableRef);

  JobReference jobReference =
      createJobReference(projectId, "direct-bigqueryhelper-export", table.getLocation());

  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  insertJobOrFetchDuplicate(projectId, job);

  if (awaitCompletion) {
    // Poll until job is complete.
    BigQueryUtils.waitForJobCompletion(service, projectId, jobReference, () -> {});
  }
}