com.google.cloud.bigquery.Dataset Java Examples

The following examples show how to use com.google.cloud.bigquery.Dataset. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a dataset. */
// [TARGET create(DatasetInfo, DatasetOption...)]
// [VARIABLE "my_dataset_name"]
public Dataset createDataset(String datasetName) {
  // [START bigquery_create_dataset]
  Dataset dataset = null;
  DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetName).build();
  try {
    // the dataset was created
    dataset = bigquery.create(datasetInfo);
  } catch (BigQueryException e) {
    // the dataset was not created
  }
  // [END bigquery_create_dataset]
  return dataset;
}
 
Example #2
Source File: DatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of reloading a dataset. */
// [TARGET reload(DatasetOption...)]
public Dataset reloadDataset() {
  // [START ]
  Dataset latestDataset = dataset.reload();
  if (latestDataset == null) {
    // The dataset was not found
  }
  // [END ]
  return latestDataset;
}
 
Example #3
Source File: ITBigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateGetAndDeleteDataset() throws InterruptedException {
  DatasetId datasetId = DatasetId.of(bigquery.getOptions().getProjectId(), OTHER_DATASET);
  Dataset dataset = bigquerySnippets.createDataset(OTHER_DATASET);
  assertNotNull(dataset);
  assertEquals(datasetId, bigquerySnippets.getDataset(OTHER_DATASET).getDatasetId());
  assertNotNull(bigquerySnippets.updateDataset(OTHER_DATASET, "new description"));
  assertEquals(
      "new description",
      bigquerySnippets.getDatasetFromId(datasetId.getProject(), OTHER_DATASET).getDescription());
  Set<DatasetId> datasets =
      Sets.newHashSet(
          Iterators.transform(
              bigquerySnippets.listDatasets().iterateAll().iterator(), TO_DATASET_ID_FUNCTION));
  while (!datasets.contains(datasetId)) {
    Thread.sleep(500);
    datasets =
        Sets.newHashSet(
            Iterators.transform(
                bigquerySnippets.listDatasets().iterateAll().iterator(), TO_DATASET_ID_FUNCTION));
  }
  datasets =
      Sets.newHashSet(
          Iterators.transform(
              bigquerySnippets.listDatasets(datasetId.getProject()).iterateAll().iterator(),
              TO_DATASET_ID_FUNCTION));
  while (!datasets.contains(datasetId)) {
    Thread.sleep(500);
    datasets =
        Sets.newHashSet(
            Iterators.transform(
                bigquerySnippets.listDatasets(datasetId.getProject()).iterateAll().iterator(),
                TO_DATASET_ID_FUNCTION));
  }
  assertTrue(bigquerySnippets.deleteDataset(OTHER_DATASET));
  assertFalse(bigquerySnippets.deleteDatasetFromId(datasetId.getProject(), OTHER_DATASET));
}
 
Example #4
Source File: ITDatasetInfoSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpdateDatasetExpiration() throws InterruptedException {
  Dataset dataset = bigquery.getDataset(DATASET);
  Long beforeExpiration = datasetInfoSnippets.updateDatasetExpiration(dataset);
  dataset = bigquery.getDataset(DATASET);
  Long afterExpiration = dataset.getDefaultTableLifetime();
  assertNotEquals(beforeExpiration, afterExpiration);
}
 
Example #5
Source File: ITDatasetInfoSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpdateDatasetAccess() throws InterruptedException {
  Dataset dataset = bigquery.getDataset(DATASET);
  List<Acl> beforeAcls = datasetInfoSnippets.updateDatasetAccess(dataset);
  dataset = bigquery.getDataset(DATASET);
  List<Acl> afterAcls = dataset.getAcl();
  assertEquals(beforeAcls.size() + 1, afterAcls.size());
}
 
Example #6
Source File: ITDatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testReload() {
  assertNull(dataset.getFriendlyName());

  Builder builder = dataset.toBuilder();
  builder.setFriendlyName(FRIENDLY_NAME);
  builder.build().update();

  Dataset reloadedDataset = datasetSnippets.reloadDataset();
  assertEquals(FRIENDLY_NAME, reloadedDataset.getFriendlyName());
}
 
Example #7
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of getting a dataset. */
// [TARGET getDataset(DatasetId, DatasetOption...)]
// [VARIABLE "my_project_id"]
// [VARIABLE "my_dataset_name"]
public Dataset getDatasetFromId(String projectId, String datasetName) {
  // [START bigquery_get_dataset]
  DatasetId datasetId = DatasetId.of(projectId, datasetName);
  Dataset dataset = bigquery.getDataset(datasetId);
  // [END bigquery_get_dataset]
  return dataset;
}
 
Example #8
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of getting a dataset. */
// [TARGET getDataset(String, DatasetOption...)]
// [VARIABLE "my_dataset"]
public Dataset getDataset(String datasetName) {
  // [START ]
  Dataset dataset = bigquery.getDataset(datasetName);
  // [END ]
  return dataset;
}
 
Example #9
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of listing datasets in a project, specifying the page size. */
// [TARGET listDatasets(String, DatasetListOption...)]
// [VARIABLE "my_project_id"]
public Page<Dataset> listDatasets(String projectId) {
  // [START bigquery_list_datasets]
  // List datasets in a specified project
  Page<Dataset> datasets = bigquery.listDatasets(projectId, DatasetListOption.pageSize(100));
  for (Dataset dataset : datasets.iterateAll()) {
    // do something with the dataset
  }
  // [END bigquery_list_datasets]
  return datasets;
}
 
Example #10
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of listing datasets, specifying the page size. */
// [TARGET listDatasets(DatasetListOption...)]
public Page<Dataset> listDatasets() {
  // [START bigquery_list_datasets]
  // List datasets in the default project
  Page<Dataset> datasets = bigquery.listDatasets(DatasetListOption.pageSize(100));
  for (Dataset dataset : datasets.iterateAll()) {
    // do something with the dataset
  }
  // [END bigquery_list_datasets]
  return datasets;
}
 
Example #11
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of updating a dataset by changing its description. */
// [TARGET update(DatasetInfo, DatasetOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "some_new_description"]
public Dataset updateDataset(String datasetName, String newDescription) {
  // [START bigquery_update_dataset_description]
  Dataset oldDataset = bigquery.getDataset(datasetName);
  DatasetInfo datasetInfo = oldDataset.toBuilder().setDescription(newDescription).build();
  Dataset newDataset = bigquery.update(datasetInfo);
  // [END bigquery_update_dataset_description]
  return newDataset;
}
 
Example #12
Source File: DatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of updating a dataset. */
// [TARGET update(DatasetOption...)]
// [VARIABLE "my_friendly_name"]
public Dataset updateDataset(String friendlyName) {
  // [START ]
  Builder builder = dataset.toBuilder();
  builder.setFriendlyName(friendlyName);
  Dataset updatedDataset = builder.build().update();
  // [END ]
  return updatedDataset;
}
 
Example #13
Source File: BigQueryDatasetRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public Set<String> listDatasets() throws IOException {
    BigQuery bigquery = BigQueryConnection.createClient(properties.getDatastoreProperties());
    Page<Dataset> datasets = bigquery.listDatasets(properties.getDatastoreProperties().projectName.getValue(),
            BigQuery.DatasetListOption.pageSize(100));
    Set<String> datasetsName = new HashSet<>();
    Iterator<Dataset> datasetIterator = datasets.iterateAll().iterator();
    while (datasetIterator.hasNext()) {
        datasetsName.add(datasetIterator.next().getDatasetId().getDataset());
    }
    return datasetsName;
}
 
Example #14
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test(expected = BigQueryException.class)
public void shouldFailWhenJobTerminatesExceptionally() throws InterruptedException {
  when(bigQuery.getDataset(DATASET_ID)).thenReturn(mock(Dataset.class));

  when(bigQuery.create(any(JobInfo.class))).thenReturn(job);
  doThrow(new BigQueryException(mock(IOException.class))).when(job)
      .waitFor(any(RetryOption.class));

  BigQueryOutput.create(() -> floBigQueryClient, TABLE_ID).provide(null).publish();
}
 
Example #15
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test(expected = RuntimeException.class)
public void shouldFailWhenJobDisappears() throws InterruptedException {
  when(bigQuery.getDataset(DATASET_ID)).thenReturn(mock(Dataset.class));

  when(bigQuery.create(any(JobInfo.class))).thenReturn(job);
  when(job.waitFor(any(RetryOption.class))).thenReturn(null);

  BigQueryOutput.create(() -> floBigQueryClient, TABLE_ID).provide(null).publish();
}
 
Example #16
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test(expected = RuntimeException.class)
public void shouldFailWhenJobTerminatesWithError() throws InterruptedException {
  when(bigQuery.getDataset(DATASET_ID)).thenReturn(mock(Dataset.class));

  when(bigQuery.create(any(JobInfo.class))).thenReturn(job);
  when(job.waitFor(any(RetryOption.class))).thenReturn(job);
  when(job.getStatus()).thenReturn(mock(JobStatus.class));
  when(job.getStatus().getError()).thenReturn(new BigQueryError("", "", "job error"));

  BigQueryOutput.create(() -> floBigQueryClient, TABLE_ID).provide(null).publish();
}
 
Example #17
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldReturnTableIdWhenExists() {
  when(bigQuery.getDataset(DATASET_ID)).thenReturn(mock(Dataset.class));
  when(bigQuery.getTable(TABLE_ID)).thenReturn(mock(Table.class));

  final BigQueryOutput bigQueryOutput = BigQueryOutput.create(() -> floBigQueryClient, TABLE_ID);

  final TableId tableId = bigQueryOutput.lookup(null).get();

  assertThat(tableId, is(TABLE_ID));
}
 
Example #18
Source File: QuickStartIT.java    From java-docs-samples with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  bigquery = BigQueryOptions.getDefaultInstance().getService();
  if (bigquery.getDataset(datasetName) == null) {
    Dataset dataset = bigquery.create(DatasetInfo.newBuilder(datasetName).build());
  }
  bout = new ByteArrayOutputStream();
  out = new PrintStream(bout);
  System.setOut(out);
}
 
Example #19
Source File: BQClient.java    From beast with Apache License 2.0 5 votes vote down vote up
private void upsertDatasetAndTable(TableInfo tableInfo) {
    Dataset dataSet = bigquery.getDataset(tableID.getDataset());
    if (dataSet == null || !bigquery.getDataset(tableID.getDataset()).exists()) {
        bigquery.create(
                Dataset.newBuilder(tableID.getDataset())
                        .setLabels(bqConfig.getDatasetLabels())
                        .build()
        );
        log.info("Successfully CREATED bigquery DATASET: {}", tableID.getDataset());
    } else if (!dataSet.getLabels().equals(bqConfig.getDatasetLabels())) {
        bigquery.update(
                Dataset.newBuilder(tableID.getDataset())
                        .setLabels(bqConfig.getDatasetLabels())
                        .build()
        );
        log.info("Successfully UPDATED bigquery DATASET: {} with labels", tableID.getDataset());
    }

    Table table = bigquery.getTable(tableID);
    if (table == null || !table.exists()) {
        bigquery.create(tableInfo);
        log.info("Successfully CREATED bigquery TABLE: {}", tableID.getTable());
    } else {
        Schema existingSchema = table.getDefinition().getSchema();
        Schema updatedSchema = tableInfo.getDefinition().getSchema();

        if (shouldUpdateTable(tableInfo, table, existingSchema, updatedSchema)) {
            Instant start = Instant.now();
            bigquery.update(tableInfo);
            log.info("Successfully UPDATED bigquery TABLE: {}", tableID.getTable());
            statsClient.timeIt("bq.upsert.table.time," + statsClient.getBqTags(), start);
            statsClient.increment("bq.upsert.table.count," + statsClient.getBqTags());
        } else {
            log.info("Skipping bigquery table update, since proto schema hasn't changed");
        }
    }
}
 
Example #20
Source File: BigQueryConnector.java    From coolretailer with Apache License 2.0 5 votes vote down vote up
@Bean
public BigQuery getInstance() throws IOException {
	// projectId needs to be set explicitly even if it's there in the json key!!
	BigQuery bigQuery = BigQueryOptions.newBuilder().setProjectId(porjectIdProvider.getProjectId())
			.setCredentials(credentialsProvider.getCredentials()).build().getService();

	// Use the client.
	LOGGER.info("Datasets:");
	for (Dataset dataset : bigQuery.listDatasets().iterateAll()) {
		LOGGER.info(dataset.getDatasetId().getDataset());
	}
	return bigQuery;

}
 
Example #21
Source File: BigQueryClient.java    From presto with Apache License 2.0 5 votes vote down vote up
private Dataset addDataSetMappingIfNeeded(Dataset dataset)
{
    DatasetId bigQueryDatasetId = dataset.getDatasetId();
    DatasetId prestoDatasetId = DatasetId.of(bigQueryDatasetId.getProject(), bigQueryDatasetId.getDataset().toLowerCase(ENGLISH));
    datasetIds.putIfAbsent(prestoDatasetId, bigQueryDatasetId);
    return dataset;
}
 
Example #22
Source File: DatasetSnippets.java    From google-cloud-java with Apache License 2.0 4 votes vote down vote up
public DatasetSnippets(Dataset dataset) {
  this.dataset = dataset;
}
 
Example #23
Source File: BigQueryClient.java    From presto with Apache License 2.0 4 votes vote down vote up
Iterable<Dataset> listDatasets(String projectId)
{
    final Iterator<Dataset> datasets = bigQuery.listDatasets(projectId).iterateAll().iterator();
    return () -> Iterators.transform(datasets, this::addDataSetMappingIfNeeded);
}
 
Example #24
Source File: BigQueryMocking.java    From flo with Apache License 2.0 4 votes vote down vote up
@Override
public DatasetInfo getDataset(DatasetId datasetId) {
  return Dataset.newBuilder(datasetId)
      .setLocation("test") // TOOD: make mockable?
      .build();
}
 
Example #25
Source File: BigQueryExample.java    From google-cloud-java with Apache License 2.0 4 votes vote down vote up
@Override
public void run(BigQuery bigquery, Void arg) {
  for (Dataset dataset : bigquery.listDatasets().iterateAll()) {
    System.out.println(dataset);
  }
}
 
Example #26
Source File: KeyByBigQueryTableDestination.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
/**
 * Return the appropriate table destination instance for the given document type and other
 * attributes.
 */
public TableDestination getTableDestination(Map<String, String> attributes) {
  attributes = new HashMap<>(attributes);

  // We coerce all docType and namespace names to be snake_case and to remove invalid
  // characters; these transformations MUST match with the transformations applied by the
  // jsonschema-transpiler and mozilla-schema-generator when creating table schemas in BigQuery.
  final String namespace = attributes.get(Attribute.DOCUMENT_NAMESPACE);
  final String docType = attributes.get(Attribute.DOCUMENT_TYPE);
  if (namespace != null) {
    attributes.put(Attribute.DOCUMENT_NAMESPACE, getAndCacheNormalizedName(namespace));
  }
  if (docType != null) {
    attributes.put(Attribute.DOCUMENT_TYPE, getAndCacheNormalizedName(docType));
  }

  // Only letters, numbers, and underscores are allowed in BigQuery dataset and table names,
  // but some doc types and namespaces contain '-', so we convert to '_'; we don't pass all
  // values through getAndCacheBqName to avoid expensive regex operations and polluting the
  // cache of transformed field names.
  attributes = Maps.transformValues(attributes, v -> v.replaceAll("-", "_"));

  final String tableSpec = StringSubstitutor.replace(tableSpecTemplate.get(), attributes);

  // Send to error collection if incomplete tableSpec; $ is not a valid char in tableSpecs.
  if (tableSpec.contains("$")) {
    throw new IllegalArgumentException("Element did not contain all the attributes needed to"
        + " fill out variables in the configured BigQuery output template: "
        + tableSpecTemplate.get());
  }

  final TableDestination tableDestination = new TableDestination(tableSpec, null,
      new TimePartitioning().setField(partitioningField.get()),
      new Clustering().setFields(clusteringFields.get()));
  final TableReference ref = BigQueryHelpers.parseTableSpec(tableSpec);
  final DatasetReference datasetRef = new DatasetReference().setProjectId(ref.getProjectId())
      .setDatasetId(ref.getDatasetId());

  if (bqService == null) {
    bqService = BigQueryOptions.newBuilder().setProjectId(ref.getProjectId())
        .setRetrySettings(RETRY_SETTINGS).build().getService();
  }

  // Get and cache a listing of table names for this dataset.
  Set<String> tablesInDataset;
  if (tableListingCache == null) {
    // We need to be very careful about settings for the cache here. We have had significant
    // issues in the past due to exceeding limits on BigQuery API requests; see
    // https://bugzilla.mozilla.org/show_bug.cgi?id=1623000
    tableListingCache = CacheBuilder.newBuilder().expireAfterWrite(Duration.ofMinutes(10))
        .build();
  }
  try {
    tablesInDataset = tableListingCache.get(datasetRef, () -> {
      Set<String> tableSet = new HashSet<>();
      Dataset dataset = bqService.getDataset(ref.getDatasetId());
      if (dataset != null) {
        dataset.list().iterateAll().forEach(t -> {
          tableSet.add(t.getTableId().getTable());
        });
      }
      return tableSet;
    });
  } catch (ExecutionException e) {
    throw new UncheckedExecutionException(e.getCause());
  }

  // Send to error collection if dataset or table doesn't exist so BigQueryIO doesn't throw a
  // pipeline execution exception.
  if (tablesInDataset.isEmpty()) {
    throw new IllegalArgumentException("Resolved destination dataset does not exist or has no "
        + " tables for tableSpec " + tableSpec);
  } else if (!tablesInDataset.contains(ref.getTableId())) {
    throw new IllegalArgumentException("Resolved destination table does not exist: " + tableSpec);
  }

  return tableDestination;
}
 
Example #27
Source File: ITBigQuerySnippets.java    From google-cloud-java with Apache License 2.0 4 votes vote down vote up
@Override
public DatasetId apply(Dataset dataset) {
  return dataset.getDatasetId();
}