com.google.api.services.bigquery.model.DatasetReference Java Examples
The following examples show how to use
com.google.api.services.bigquery.model.DatasetReference.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BqExtractOperatorFactory.java From digdag with Apache License 2.0 | 6 votes |
@Override protected JobConfiguration jobConfiguration(String projectId) { JobConfigurationExtract cfg = new JobConfigurationExtract(); try { cfg.setDestinationUris(params.getList("destination", String.class)); } catch (ConfigException ignore) { cfg.setDestinationUri(params.get("destination", String.class)); } Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class) .transform(Bq::datasetReference); String sourceTable = params.get("_command", String.class); cfg.setSourceTable(tableReference(projectId, defaultDataset, sourceTable)); params.getOptional("print_header", boolean.class).transform(cfg::setPrintHeader); params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter); params.getOptional("destination_format", String.class).transform(cfg::setDestinationFormat); params.getOptional("compression", String.class).transform(cfg::setCompression); return new JobConfiguration() .setExtract(cfg); }
Example #2
Source File: CheckedBigquery.java From nomulus with Apache License 2.0 | 6 votes |
/** * Ensures the dataset exists by trying to create it. Note that it's not appreciably cheaper * to check for dataset existence than it is to try to create it and check for exceptions. */ // Note that these are not static so they can be mocked for testing. private void ensureDataset(Bigquery bigquery, String projectId, String datasetId) throws IOException { try { bigquery.datasets() .insert(projectId, new Dataset().setDatasetReference( new DatasetReference() .setProjectId(projectId) .setDatasetId(datasetId))) .execute(); } catch (IOException e) { // Swallow errors about a duplicate dataset, and throw any other ones. if (!BigqueryJobFailureException.create(e).getReason().equals("duplicate")) { throw e; } } }
Example #3
Source File: BqDdlOperatorFactory.java From digdag with Apache License 2.0 | 6 votes |
private Table table(String defaultProjectId, Optional<DatasetReference> defaultDataset, TableConfig config) { Optional<String> datasetId = config.dataset().or(defaultDataset.transform(DatasetReference::getDatasetId)); if (!datasetId.isPresent()) { throw new ConfigException("Bad table reference or configuration: Missing 'dataset'"); } return new Table() .setTableReference(new TableReference() .setProjectId(config.project().or(defaultProjectId)) .setDatasetId(datasetId.get()) .setTableId(config.id())) .setSchema(config.schema().orNull()) .setFriendlyName(config.friendly_name().orNull()) .setExpirationTime(config.expiration_time() .transform(p -> p.getTimestamp().toInstant(request.getTimeZone()).toEpochMilli()).orNull()) .setTimePartitioning(config.time_partitioning().orNull()) .setView(config.view().orNull()); }
Example #4
Source File: BqDdlOperatorFactory.java From digdag with Apache License 2.0 | 6 votes |
private Table table(String defaultProjectId, Optional<DatasetReference> defaultDataset, JsonNode node) { if (node.isTextual()) { return new Table() .setTableReference(Bq.tableReference(defaultProjectId, defaultDataset, node.asText())); } else { TableConfig config; try { config = objectMapper.readValue(node.traverse(), TableConfig.class); } catch (IOException e) { throw new ConfigException("Invalid table reference or configuration: " + node, e); } return table(defaultProjectId, defaultDataset, config); } }
Example #5
Source File: ExampleUtils.java From deployment-examples with MIT License | 5 votes |
private void setupBigQueryTable( String projectId, String datasetId, String tableId, TableSchema schema) throws IOException { if (bigQueryClient == null) { bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build(); } Datasets datasetService = bigQueryClient.datasets(); if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) { Dataset newDataset = new Dataset() .setDatasetReference( new DatasetReference().setProjectId(projectId).setDatasetId(datasetId)); datasetService.insert(projectId, newDataset).execute(); } Tables tableService = bigQueryClient.tables(); Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId)); if (table == null) { Table newTable = new Table() .setSchema(schema) .setTableReference( new TableReference() .setProjectId(projectId) .setDatasetId(datasetId) .setTableId(tableId)); tableService.insert(projectId, datasetId, newTable).execute(); } else if (!table.getSchema().equals(schema)) { throw new RuntimeException( "Table exists and schemas do not match, expecting: " + schema.toPrettyString() + ", actual: " + table.getSchema().toPrettyString()); } }
Example #6
Source File: GcpUtil.java From digdag with Apache License 2.0 | 5 votes |
static Dataset createDataset(Bigquery bq, String projectId, String datasetId) throws IOException, RetryExecutor.RetryGiveupException { Dataset dataset = new Dataset() .setDatasetReference(new DatasetReference() .setDatasetId(datasetId)); Dataset created = createDataset(bq, projectId, dataset); assertThat(datasetExists(bq, projectId, datasetId), is(true)); return created; }
Example #7
Source File: BigQueryIT.java From digdag with Apache License 2.0 | 5 votes |
@Test public void testLoad() throws Exception { assumeThat(GCS_TEST_BUCKET, not(isEmptyOrNullString())); // Create source data object String objectName = GCS_PREFIX + "test.csv"; byte[] data = Joiner.on('\n').join("a,b", "c,d").getBytes(UTF_8); InputStreamContent content = new InputStreamContent("text/csv", new ByteArrayInputStream(data)) .setLength(data.length); StorageObject metadata = new StorageObject().setName(objectName); retryExecutor.run(() -> gcs.objects() .insert(GCS_TEST_BUCKET, metadata, content) .execute()); // Create output dataset String datasetId = BQ_TAG + "_load_test"; Dataset dataset = new Dataset().setDatasetReference(new DatasetReference() .setProjectId(gcpProjectId) .setDatasetId(datasetId)); retryExecutor.run(() -> bq.datasets().insert(gcpProjectId, dataset) .execute()); // Run load String tableId = "data"; addWorkflow(projectDir, "acceptance/bigquery/load.dig"); Id attemptId = pushAndStart(server.endpoint(), projectDir, "load", ImmutableMap.of( "source_bucket", GCS_TEST_BUCKET, "source_object", objectName, "target_dataset", datasetId, "target_table", tableId, "outfile", outfile.toString())); expect(Duration.ofMinutes(5), attemptSuccess(server.endpoint(), attemptId)); assertThat(Files.exists(outfile), is(true)); // Check that destination table was created Table destinationTable = retryExecutor.run(() -> bq.tables().get(gcpProjectId, datasetId, tableId).execute()); assertThat(destinationTable.getTableReference().getTableId(), is(tableId)); }
Example #8
Source File: BqOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
@Override protected JobConfiguration jobConfiguration(String projectId) { JobConfigurationQuery cfg = new JobConfigurationQuery() .setQuery(query); cfg.setUseLegacySql(params.get("use_legacy_sql", boolean.class, false)); params.getOptional("allow_large_results", boolean.class).transform(cfg::setAllowLargeResults); params.getOptional("use_query_cache", Boolean.class).transform(cfg::setUseQueryCache); params.getOptional("create_disposition", String.class).transform(cfg::setCreateDisposition); params.getOptional("write_disposition", String.class).transform(cfg::setWriteDisposition); params.getOptional("flatten_results", Boolean.class).transform(cfg::setFlattenResults); params.getOptional("maximum_billing_tier", Integer.class).transform(cfg::setMaximumBillingTier); params.getOptional("priority", String.class).transform(cfg::setPriority); params.getOptional("table_definitions", new TypeReference<Map<String, ExternalDataConfiguration>>() {}) .transform(cfg::setTableDefinitions); params.getOptional("user_defined_function_resources", new TypeReference<List<UserDefinedFunctionResource>>() {}) .transform(cfg::setUserDefinedFunctionResources); Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class) .transform(Bq::datasetReference); defaultDataset.transform(cfg::setDefaultDataset); params.getOptional("destination_table", String.class) .transform(s -> cfg.setDestinationTable(tableReference(projectId, defaultDataset, s))); return new JobConfiguration() .setQuery(cfg); }
Example #9
Source File: Bq.java From digdag with Apache License 2.0 | 5 votes |
static DatasetReference datasetReference(Optional<String> defaultProjectId, String s) { Matcher matcher = DATASET_REFERENCE_PATTERN.matcher(s); if (!matcher.matches()) { throw new IllegalArgumentException("Bad dataset reference: " + s); } return new DatasetReference() .setProjectId(Optional.fromNullable(matcher.group("project")).or(defaultProjectId).orNull()) .setDatasetId(matcher.group("dataset")); }
Example #10
Source File: Bq.java From digdag with Apache License 2.0 | 5 votes |
static TableReference tableReference(String defaultProjectId, Optional<DatasetReference> defaultDataset, String s) { Matcher matcher = TABLE_REFERENCE_PATTERN.matcher(s); if (!matcher.matches()) { throw new IllegalArgumentException("Bad table reference: " + s); } String project = matcher.group("project"); if (project == null) { if (defaultDataset.isPresent() && defaultDataset.get().getProjectId() != null) { project = defaultDataset.get().getProjectId(); } else { project = defaultProjectId; } } Optional<String> dataset = Optional.fromNullable(matcher.group("dataset")) .or(defaultDataset.transform(DatasetReference::getDatasetId)); String table = matcher.group("table"); if (!dataset.isPresent()) { throw new IllegalArgumentException("Bad table reference. Either configure 'dataset' or include dataset name in table reference: " + s); } return new TableReference() .setProjectId(project) .setDatasetId(dataset.get()) .setTableId(table); }
Example #11
Source File: BqDdlOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private Dataset dataset(String defaultProjectId, DatasetConfig config) { return new Dataset() .setDatasetReference(new DatasetReference() .setProjectId(config.project().or(defaultProjectId)) .setDatasetId(config.id())) .setFriendlyName(config.friendly_name().orNull()) .setDefaultTableExpirationMs(config.default_table_expiration().transform(d -> d.getDuration().toMillis()).orNull()) .setLocation(config.location().orNull()) .setAccess(config.access().orNull()) .setLabels(config.labels().orNull()); }
Example #12
Source File: BqDdlOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private BqOperation deleteDataset(JsonNode config) { if (!config.isTextual()) { throw new ConfigException("Bad dataset reference: " + config); } return (bq, projectId) -> { DatasetReference r = datasetReference(projectId, config.asText()); bq.deleteDataset(r.getProjectId(), r.getDatasetId()); }; }
Example #13
Source File: BqLoadOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
@Override protected JobConfiguration jobConfiguration(String projectId) { JobConfigurationLoad cfg = new JobConfigurationLoad() .setSourceUris(sourceUris(params)); if (params.has("schema")) { cfg.setSchema(tableSchema(params)); } Optional<DatasetReference> defaultDataset = params.getOptional("dataset", String.class) .transform(Bq::datasetReference); String destinationTable = params.get("destination_table", String.class); cfg.setDestinationTable(tableReference(projectId, defaultDataset, destinationTable)); params.getOptional("create_disposition", String.class).transform(cfg::setCreateDisposition); params.getOptional("write_disposition", String.class).transform(cfg::setWriteDisposition); params.getOptional("source_format", String.class).transform(cfg::setSourceFormat); params.getOptional("field_delimiter", String.class).transform(cfg::setFieldDelimiter); params.getOptional("skip_leading_rows", int.class).transform(cfg::setSkipLeadingRows); params.getOptional("encoding", String.class).transform(cfg::setEncoding); params.getOptional("quote", String.class).transform(cfg::setQuote); params.getOptional("max_bad_records", int.class).transform(cfg::setMaxBadRecords); params.getOptional("allow_quoted_newlines", boolean.class).transform(cfg::setAllowQuotedNewlines); params.getOptional("allow_jagged_rows", boolean.class).transform(cfg::setAllowJaggedRows); params.getOptional("ignore_unknown_values", boolean.class).transform(cfg::setIgnoreUnknownValues); Optional.of(params.getListOrEmpty("projection_fields", String.class)).transform(cfg::setProjectionFields); params.getOptional("autodetect", boolean.class).transform(cfg::setAutodetect); Optional.of(params.getListOrEmpty("schema_update_options", String.class)).transform(cfg::setSchemaUpdateOptions); return new JobConfiguration() .setLoad(cfg); }
Example #14
Source File: BigqueryConnection.java From nomulus with Apache License 2.0 | 5 votes |
/** * Helper that creates a dataset with this name if it doesn't already exist, and returns true * if creation took place. */ public boolean createDatasetIfNeeded(String datasetName) throws IOException { if (!checkDatasetExists(datasetName)) { bigquery.datasets() .insert(getProjectId(), new Dataset().setDatasetReference(new DatasetReference() .setProjectId(getProjectId()) .setDatasetId(datasetName))) .execute(); logger.atInfo().log("Created dataset: %s:%s\n", getProjectId(), datasetName); return true; } return false; }
Example #15
Source File: FakeDatasetService.java From beam with Apache License 2.0 | 5 votes |
@Override public Dataset getDataset(String projectId, String datasetId) throws IOException, InterruptedException { synchronized (tables) { Map<String, TableContainer> dataset = tables.get(projectId, datasetId); if (dataset == null) { throwNotFound( "Tried to get a dataset %s:%s, but no such table was set", projectId, datasetId); } return new Dataset() .setDatasetReference( new DatasetReference().setDatasetId(datasetId).setProjectId(projectId)); } }
Example #16
Source File: ExampleUtils.java From beam with Apache License 2.0 | 5 votes |
private void setupBigQueryTable( String projectId, String datasetId, String tableId, TableSchema schema) throws IOException { if (bigQueryClient == null) { bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build(); } Datasets datasetService = bigQueryClient.datasets(); if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) { Dataset newDataset = new Dataset() .setDatasetReference( new DatasetReference().setProjectId(projectId).setDatasetId(datasetId)); datasetService.insert(projectId, newDataset).execute(); } Tables tableService = bigQueryClient.tables(); Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId)); if (table == null) { Table newTable = new Table() .setSchema(schema) .setTableReference( new TableReference() .setProjectId(projectId) .setDatasetId(datasetId) .setTableId(tableId)); tableService.insert(projectId, datasetId, newTable).execute(); } else if (!table.getSchema().equals(schema)) { throw new RuntimeException( "Table exists and schemas do not match, expecting: " + schema.toPrettyString() + ", actual: " + table.getSchema().toPrettyString()); } }
Example #17
Source File: Bq.java From digdag with Apache License 2.0 | 4 votes |
static DatasetReference datasetReference(String s) { return datasetReference(Optional.absent(), s); }
Example #18
Source File: Bq.java From digdag with Apache License 2.0 | 4 votes |
static DatasetReference datasetReference(String defaultProjectId, String s) { return datasetReference(Optional.of(defaultProjectId), s); }
Example #19
Source File: BigqueryConnection.java From nomulus with Apache License 2.0 | 4 votes |
/** Returns dataset reference that can be used to avoid having to specify dataset in SQL code. */ public DatasetReference getDataset() { return new DatasetReference() .setProjectId(getProjectId()) .setDatasetId(getDatasetId()); }
Example #20
Source File: BigQueryIT.java From digdag with Apache License 2.0 | 4 votes |
@Test public void testExtract() throws Exception { assumeThat(GCS_TEST_BUCKET, not(isEmptyOrNullString())); // Create source table String tableId = "data"; String datasetId = BQ_TAG + "_extract_test"; Dataset dataset = new Dataset().setDatasetReference(new DatasetReference() .setProjectId(gcpProjectId) .setDatasetId(datasetId)); retryExecutor.run(() -> bq.datasets().insert(gcpProjectId, dataset) .execute()); Table table = new Table().setTableReference(new TableReference() .setProjectId(gcpProjectId) .setTableId(tableId)) .setSchema(new TableSchema() .setFields(ImmutableList.of( new TableFieldSchema().setName("foo").setType("STRING"), new TableFieldSchema().setName("bar").setType("STRING") ))); retryExecutor.run(() -> bq.tables().insert(gcpProjectId, datasetId, table) .execute()); // Populate source table TableDataInsertAllRequest content = new TableDataInsertAllRequest() .setRows(ImmutableList.of( new TableDataInsertAllRequest.Rows().setJson(ImmutableMap.of( "foo", "a", "bar", "b")), new TableDataInsertAllRequest.Rows().setJson(ImmutableMap.of( "foo", "c", "bar", "d")))); retryExecutor.run(() -> bq.tabledata().insertAll(gcpProjectId, datasetId, tableId, content) .execute()); // Run extract String objectName = GCS_PREFIX + "test.csv"; addWorkflow(projectDir, "acceptance/bigquery/extract.dig"); Id attemptId = pushAndStart(server.endpoint(), projectDir, "extract", ImmutableMap.of( "src_dataset", datasetId, "src_table", tableId, "dst_bucket", GCS_TEST_BUCKET, "dst_object", objectName, "outfile", outfile.toString())); expect(Duration.ofMinutes(5), attemptSuccess(server.endpoint(), attemptId)); assertThat(Files.exists(outfile), is(true)); // Check that destination file was created StorageObject metadata = retryExecutor.run(() -> gcs.objects().get(GCS_TEST_BUCKET, objectName) .execute()); assertThat(metadata.getName(), is(objectName)); ByteArrayOutputStream data = new ByteArrayOutputStream(); retryExecutor.run(() -> { try { gcs.objects().get(GCS_TEST_BUCKET, objectName) .executeMediaAndDownloadTo(data); } catch (IOException e) { throw Throwables.propagate(e); } }); }
Example #21
Source File: KeyByBigQueryTableDestination.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
/** * Return the appropriate table destination instance for the given document type and other * attributes. */ public TableDestination getTableDestination(Map<String, String> attributes) { attributes = new HashMap<>(attributes); // We coerce all docType and namespace names to be snake_case and to remove invalid // characters; these transformations MUST match with the transformations applied by the // jsonschema-transpiler and mozilla-schema-generator when creating table schemas in BigQuery. final String namespace = attributes.get(Attribute.DOCUMENT_NAMESPACE); final String docType = attributes.get(Attribute.DOCUMENT_TYPE); if (namespace != null) { attributes.put(Attribute.DOCUMENT_NAMESPACE, getAndCacheNormalizedName(namespace)); } if (docType != null) { attributes.put(Attribute.DOCUMENT_TYPE, getAndCacheNormalizedName(docType)); } // Only letters, numbers, and underscores are allowed in BigQuery dataset and table names, // but some doc types and namespaces contain '-', so we convert to '_'; we don't pass all // values through getAndCacheBqName to avoid expensive regex operations and polluting the // cache of transformed field names. attributes = Maps.transformValues(attributes, v -> v.replaceAll("-", "_")); final String tableSpec = StringSubstitutor.replace(tableSpecTemplate.get(), attributes); // Send to error collection if incomplete tableSpec; $ is not a valid char in tableSpecs. if (tableSpec.contains("$")) { throw new IllegalArgumentException("Element did not contain all the attributes needed to" + " fill out variables in the configured BigQuery output template: " + tableSpecTemplate.get()); } final TableDestination tableDestination = new TableDestination(tableSpec, null, new TimePartitioning().setField(partitioningField.get()), new Clustering().setFields(clusteringFields.get())); final TableReference ref = BigQueryHelpers.parseTableSpec(tableSpec); final DatasetReference datasetRef = new DatasetReference().setProjectId(ref.getProjectId()) .setDatasetId(ref.getDatasetId()); if (bqService == null) { bqService = BigQueryOptions.newBuilder().setProjectId(ref.getProjectId()) .setRetrySettings(RETRY_SETTINGS).build().getService(); } // Get and cache a listing of table names for this dataset. Set<String> tablesInDataset; if (tableListingCache == null) { // We need to be very careful about settings for the cache here. We have had significant // issues in the past due to exceeding limits on BigQuery API requests; see // https://bugzilla.mozilla.org/show_bug.cgi?id=1623000 tableListingCache = CacheBuilder.newBuilder().expireAfterWrite(Duration.ofMinutes(10)) .build(); } try { tablesInDataset = tableListingCache.get(datasetRef, () -> { Set<String> tableSet = new HashSet<>(); Dataset dataset = bqService.getDataset(ref.getDatasetId()); if (dataset != null) { dataset.list().iterateAll().forEach(t -> { tableSet.add(t.getTableId().getTable()); }); } return tableSet; }); } catch (ExecutionException e) { throw new UncheckedExecutionException(e.getCause()); } // Send to error collection if dataset or table doesn't exist so BigQueryIO doesn't throw a // pipeline execution exception. if (tablesInDataset.isEmpty()) { throw new IllegalArgumentException("Resolved destination dataset does not exist or has no " + " tables for tableSpec " + tableSpec); } else if (!tablesInDataset.contains(ref.getTableId())) { throw new IllegalArgumentException("Resolved destination table does not exist: " + tableSpec); } return tableDestination; }
Example #22
Source File: AbstractBigQueryIoIntegrationTestBase.java From hadoop-connectors with Apache License 2.0 | 4 votes |
@Before public void setUp() throws IOException, GeneralSecurityException { MockitoAnnotations.initMocks(this); LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE); LoggerConfig.getConfig(BigQueryUtils.class).setLevel(Level.FINE); LoggerConfig.getConfig(GsonRecordReader.class).setLevel(Level.FINE); bucketHelper = new TestBucketHelper("bq_integration_test"); // A unique per-setUp String to avoid collisions between test runs. String testId = bucketHelper.getUniqueBucketPrefix(); projectIdValue = TestConfiguration.getInstance().getProjectId(); if (Strings.isNullOrEmpty(projectIdValue)) { projectIdValue = System.getenv(BIGQUERY_PROJECT_ID_ENVVARNAME); } checkArgument( !Strings.isNullOrEmpty(projectIdValue), "Must provide %s", BIGQUERY_PROJECT_ID_ENVVARNAME); testDataset = testId + "_dataset"; testBucket = testId + "_bucket"; // We have to create the output dataset ourselves. // TODO(user): Extract dataset creation into a library which is also used by // BigQueryOutputCommitter. Dataset outputDataset = new Dataset(); DatasetReference datasetReference = new DatasetReference(); datasetReference.setProjectId(projectIdValue); datasetReference.setDatasetId(testDataset); config = getConfigForGcsFromBigquerySettings(projectIdValue); BigQueryFactory factory = new BigQueryFactory(); bigqueryInstance = factory.getBigQuery(config); Bigquery.Datasets datasets = bigqueryInstance.datasets(); outputDataset.setDatasetReference(datasetReference); logger.atInfo().log( "Creating temporary dataset '%s' for project '%s'", testDataset, projectIdValue); datasets.insert(projectIdValue, outputDataset).execute(); Path toCreate = new Path(String.format("gs://%s", testBucket)); FileSystem fs = toCreate.getFileSystem(config); logger.atInfo().log("Creating temporary test bucket '%s'", toCreate); fs.mkdirs(toCreate); // Since the TaskAttemptContext and JobContexts are mostly used just to access a // "Configuration" object, we'll mock the two contexts to just return our fake configuration // object with which we'll provide the settings we want to test. config.clear(); setConfigForGcsFromBigquerySettings(); when(mockTaskAttemptContext.getConfiguration()) .thenReturn(config); when(mockJobContext.getConfiguration()) .thenReturn(config); // Have a realistic-looking fake TaskAttemptID. int taskNumber = 3; int taskAttempt = 2; int jobNumber = 42; String jobIdString = "jobid" + System.currentTimeMillis(); JobID jobId = new JobID(jobIdString, jobNumber); TaskAttemptID taskAttemptId = new TaskAttemptID(new TaskID(jobId, false, taskNumber), taskAttempt); when(mockTaskAttemptContext.getTaskAttemptID()) .thenReturn(taskAttemptId); when(mockJobContext.getJobID()).thenReturn(jobId); testTable = testId + "_table_" + jobIdString; }