Python google.cloud.bigquery.DatasetReference() Examples
The following are 30
code examples of google.cloud.bigquery.DatasetReference().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
google.cloud.bigquery
, or try the search function
.
Example #1
Source File: main.py From professional-services with Apache License 2.0 | 7 votes |
def execute_transformation_query(bq_client): """Executes transformation query to a new destination table. Args: bq_client: Object representing a reference to a BigQuery Client """ dataset_ref = bq_client.get_dataset(bigquery.DatasetReference( project=config.config_vars['billing_project_id'], dataset_id=config.config_vars['output_dataset_id'])) table_ref = dataset_ref.table(config.config_vars['output_table_name']) job_config = bigquery.QueryJobConfig() job_config.destination = table_ref job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE job_config.time_partitioning = bigquery.TimePartitioning( field='usage_start_time', expiration_ms=None) sql = file_to_string(config.config_vars['sql_file_path']) sql = sql.format(**config.config_vars) logging.info('Attempting query on all dates...') # Execute Query query_job = bq_client.query( sql, job_config=job_config) query_job.result() # Waits for the query to finish logging.info('Transformation query complete. All partitions are updated.')
Example #2
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_copy_table(self): # If we create a new table to copy from, the test won't work # because the new rows will be stored in the streaming buffer, # and copy jobs don't read the streaming buffer. # We could wait for the streaming buffer to empty, but that could # take minutes. Instead we copy a small public table. source_dataset = DatasetReference("bigquery-public-data", "samples") source_ref = source_dataset.table("shakespeare") dest_dataset = self.temp_dataset(_make_dataset_id("copy_table")) dest_ref = dest_dataset.table("destination_table") job_config = bigquery.CopyJobConfig() job = Config.CLIENT.copy_table(source_ref, dest_ref, job_config=job_config) job.result() dest_table = Config.CLIENT.get_table(dest_ref) self.to_delete.insert(0, dest_table) # Just check that we got some rows. got_rows = self._fetch_single_page(dest_table) self.assertTrue(len(got_rows) > 0)
Example #3
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_extract_table(self): local_id = unique_resource_id() bucket_name = "bq_extract_test" + local_id source_blob_name = "person_ages.csv" dataset_id = _make_dataset_id("load_gcs_then_extract") table_id = "test_table" project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table(table_id) table = Table(table_ref) self.to_delete.insert(0, table) bucket = self._create_bucket(bucket_name) self._load_table_for_extract_table(bucket, source_blob_name, table_ref, ROWS) destination_blob_name = "person_ages_out.csv" destination = bucket.blob(destination_blob_name) destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name) job = Config.CLIENT.extract_table(table_ref, destination_uri) job.result(timeout=100) self.to_delete.insert(0, destination) got_bytes = retry_storage_errors(destination.download_as_string)() got = got_bytes.decode("utf-8") self.assertIn("Bharney Rhubble", got)
Example #4
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" table_id = "shakespeare" table_ref = DatasetReference(public, dataset_id).table(table_id) # Get table with reference. table = Config.CLIENT.get_table(table_ref) self.assertEqual(table.table_id, table_id) self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, public) schema_names = [field.name for field in table.schema] self.assertEqual(schema_names, ["word", "word_count", "corpus", "corpus_date"]) # Get table with string. table = Config.CLIENT.get_table("{}.{}.{}".format(public, dataset_id, table_id)) self.assertEqual(table.table_id, table_id) self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, public)
Example #5
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def tearDown(self): def _still_in_use(bad_request): return any( error["reason"] == "resourceInUse" for error in bad_request._errors ) retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use) retry_storage_errors_conflict = RetryErrors( (Conflict, TooManyRequests, InternalServerError, ServiceUnavailable) ) for doomed in self.to_delete: if isinstance(doomed, storage.Bucket): retry_storage_errors_conflict(doomed.delete)(force=True) elif isinstance(doomed, (Dataset, bigquery.DatasetReference)): retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True) elif isinstance(doomed, (Table, bigquery.TableReference)): retry_in_use(Config.CLIENT.delete_table)(doomed) else: doomed.delete()
Example #6
Source File: snippets.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_list_rows_as_dataframe(client): # [START bigquery_list_rows_dataframe] # from google.cloud import bigquery # client = bigquery.Client() project = "bigquery-public-data" dataset_id = "samples" dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table("shakespeare") table = client.get_table(table_ref) df = client.list_rows(table).to_dataframe() # [END bigquery_list_rows_dataframe] assert isinstance(df, pandas.DataFrame) assert len(list(df)) == len(table.schema) # verify the number of columns assert len(df) == table.num_rows # verify the number of rows
Example #7
Source File: bigquery_test.py From loaner with Apache License 2.0 | 5 votes |
def setUp(self): super(BigQueryClientTest, self).setUp() bq_patcher = mock.patch.object(gcloud_bq, 'Client', autospec=True) self.addCleanup(bq_patcher.stop) self.bq_mock = bq_patcher.start() self.dataset_ref = mock.Mock(spec=gcloud_bq.DatasetReference) self.table = mock.Mock(spec=gcloud_bq.Table) self.table.schema = [] self.dataset_ref.table.return_value = self.table with mock.patch.object( bigquery.BigQueryClient, '__init__', return_value=None): self.client = bigquery.BigQueryClient() self.client._client = self.bq_mock() self.client._dataset_ref = self.dataset_ref self.client._client.insert_rows.return_value = None self.client._client.get_table.return_value = self.table self.nested_schema = [ gcloud_bq.SchemaField('nested_string_attribute', 'STRING', 'NULLABLE')] self.entity_schema = [ gcloud_bq.SchemaField('string_attribute', 'STRING', 'NULLABLE'), gcloud_bq.SchemaField('integer_attribute', 'INTEGER', 'NULLABLE'), gcloud_bq.SchemaField('boolean_attribute', 'BOOLEAN', 'NULLABLE'), gcloud_bq.SchemaField( 'nested_attribute', 'RECORD', 'NULLABLE', fields=self.nested_schema) ] test_device = device_model.Device( serial_number='abc123', chrome_device_id='123123') test_device.put() test_row = bigquery_row_model.BigQueryRow.add( test_device, datetime.datetime.utcnow(), loanertest.USER_EMAIL, 'Enroll', 'This is a test') self.test_row_dict = test_row.to_json_dict() self.test_table = [(self.test_row_dict['ndb_key'], self.test_row_dict['timestamp'], self.test_row_dict['actor'], self.test_row_dict['method'], self.test_row_dict['summary'], self.test_row_dict['entity'])]
Example #8
Source File: helper.py From professional-services with Apache License 2.0 | 5 votes |
def execute_query(bq_client: bigquery.Client, env_vars: {}, query_path: object, output_table_name: str, time_partition: bool) -> None: """Executes transformation query to a new destination table. Args: bq_client: bigquery.Client object env_vars: Dictionary of key: value, where value is environment variable query_path: Object representing location of SQL query to execute output_table_name: String representing name of table that holds output time_partition: Boolean indicating whether to time-partition output """ dataset_ref = bq_client.get_dataset( bigquery.DatasetReference(project=bq_client.project, dataset_id=env_vars['corrected_dataset_id'])) table_ref = dataset_ref.table(output_table_name) job_config = bigquery.QueryJobConfig() job_config.destination = table_ref job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE # Time Partitioning table is only needed for final output query if time_partition: job_config.time_partitioning = bigquery.TimePartitioning( field='usage_start_time', expiration_ms=None) logging.info('Attempting query...') # Execute Query query_job = bq_client.query(query=render_template(query_path, env_vars), job_config=job_config) query_job.result() # Waits for the query to finish
Example #9
Source File: mock_bigquery_client.py From ethereum-etl-airflow with MIT License | 5 votes |
def dataset(self, dataset_id, project=None): if project is None: project = PROJECT return DatasetReference(project, dataset_id)
Example #10
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def _table_exists(t): try: tr = DatasetReference(t.project, t.dataset_id).table(t.table_id) Config.CLIENT.get_table(tr) return True except NotFound: return False
Example #11
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def _dataset_exists(ds): try: Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id)) return True except NotFound: return False
Example #12
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = Dataset(dataset_ref) if location: dataset.location = location dataset = retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset
Example #13
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_query_w_wrong_config(self): from google.cloud.bigquery.job import LoadJobConfig good_query = "SELECT 1;" rows = list(Config.CLIENT.query("SELECT 1;").result()) assert rows[0][0] == 1 project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, "dset") bad_config = LoadJobConfig() bad_config.destination = dataset_ref.table("tbl") with self.assertRaises(Exception): Config.CLIENT.query(good_query, job_config=bad_config).result()
Example #14
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_list_partitions(self): table_ref = DatasetReference( "bigquery-public-data", "ethereum_blockchain" ).table("blocks") all_rows = Config.CLIENT.list_partitions(table_ref) self.assertIn("20180801", all_rows) self.assertGreater(len(all_rows), 1000)
Example #15
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) self.assertTrue(_dataset_exists(dataset_ref)) Config.CLIENT.delete_dataset(dataset_id) self.assertFalse(_dataset_exists(dataset_ref))
Example #16
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_get_dataset(self): dataset_id = _make_dataset_id("get_dataset") client = Config.CLIENT project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset_arg = Dataset(dataset_ref) dataset_arg.friendly_name = "Friendly" dataset_arg.description = "Description" dataset = retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) dataset_ref = bigquery.DatasetReference(project, dataset_id) # Get with a reference. got = client.get_dataset(dataset_ref) self.assertEqual(got.friendly_name, "Friendly") self.assertEqual(got.description, "Description") # Get with a string. got = client.get_dataset(dataset_id) self.assertEqual(got.friendly_name, "Friendly") self.assertEqual(got.description, "Description") # Get with a fully-qualified string. got = client.get_dataset("{}.{}".format(client.project, dataset_id)) self.assertEqual(got.friendly_name, "Friendly") self.assertEqual(got.description, "Description")
Example #17
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_extract_table_compressed(client, to_delete): bucket_name = "extract_shakespeare_compress_{}".format(_millis()) storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) project = "bigquery-public-data" dataset_id = "samples" # [START bigquery_extract_table_compressed] # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz") dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table("shakespeare") job_config = bigquery.job.ExtractJobConfig() job_config.compression = bigquery.Compression.GZIP extract_job = client.extract_table( table_ref, destination_uri, # Location must match that of the source table. location="US", job_config=job_config, ) # API request extract_job.result() # Waits for job to complete. # [END bigquery_extract_table_compressed] blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv.gz") assert blob.exists assert blob.size > 0 to_delete.insert(0, blob)
Example #18
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_extract_table_json(client, to_delete): bucket_name = "extract_shakespeare_json_{}".format(_millis()) storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) project = "bigquery-public-data" dataset_id = "samples" # [START bigquery_extract_table_json] # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.json") dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table("shakespeare") job_config = bigquery.job.ExtractJobConfig() job_config.destination_format = bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON extract_job = client.extract_table( table_ref, destination_uri, job_config=job_config, # Location must match that of the source table. location="US", ) # API request extract_job.result() # Waits for job to complete. # [END bigquery_extract_table_json] blob = retry_storage_errors(bucket.get_blob)("shakespeare.json") assert blob.exists assert blob.size > 0 to_delete.insert(0, blob)
Example #19
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_extract_table(client, to_delete): bucket_name = "extract_shakespeare_{}".format(_millis()) storage_client = storage.Client() bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name) to_delete.append(bucket) # [START bigquery_extract_table] # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' project = "bigquery-public-data" dataset_id = "samples" table_id = "shakespeare" destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv") dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table(table_id) extract_job = client.extract_table( table_ref, destination_uri, # Location must match that of the source table. location="US", ) # API request extract_job.result() # Waits for job to complete. print( "Exported {}:{}.{} to {}".format(project, dataset_id, table_id, destination_uri) ) # [END bigquery_extract_table] blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv") assert blob.exists assert blob.size > 0 to_delete.insert(0, blob)
Example #20
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_update_table_cmek(client, to_delete): """Patch a table's metadata.""" dataset_id = "update_table_cmek_{}".format(_millis()) table_id = "update_table_cmek_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) table = bigquery.Table(dataset.table(table_id)) original_kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( "cloud-samples-tests", "us", "test", "test" ) table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=original_kms_key_name ) table = client.create_table(table) # [START bigquery_update_table_cmek] # from google.cloud import bigquery # client = bigquery.Client() assert table.encryption_configuration.kms_key_name == original_kms_key_name # Set a new encryption key to use for the destination. # TODO: Replace this key with a key you have created in KMS. updated_kms_key_name = ( "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/otherkey" ) table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=updated_kms_key_name ) table = client.update_table(table, ["encryption_configuration"]) # API request assert table.encryption_configuration.kms_key_name == updated_kms_key_name assert original_kms_key_name != updated_kms_key_name # [END bigquery_update_table_cmek]
Example #21
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_relax_column(client, to_delete): """Updates a schema field from required to nullable.""" dataset_id = "relax_column_dataset_{}".format(_millis()) table_id = "relax_column_table_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) dataset = client.create_dataset(dataset) to_delete.append(dataset) # [START bigquery_relax_column] # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' # table_id = 'my_table' original_schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] dataset_ref = bigquery.DatasetReference(project, dataset_id) table_ref = dataset_ref.table(table_id) table = bigquery.Table(table_ref, schema=original_schema) table = client.create_table(table) assert all(field.mode == "REQUIRED" for field in table.schema) # SchemaField properties cannot be edited after initialization. # To make changes, construct new SchemaField objects. relaxed_schema = [ bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"), bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), ] table.schema = relaxed_schema table = client.update_table(table, ["schema"]) assert all(field.mode == "NULLABLE" for field in table.schema) # [END bigquery_relax_column]
Example #22
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_update_table_description(client, to_delete): """Update a table's description.""" dataset_id = "update_table_description_dataset_{}".format(_millis()) table_id = "update_table_description_table_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) table.description = "Original description." table = client.create_table(table) # [START bigquery_update_table_description] # from google.cloud import bigquery # client = bigquery.Client() # project = client.project # dataset_ref = bigquery.DatasetReference(project, dataset_id) # table_ref = dataset_ref.table('my_table') # table = client.get_table(table_ref) # API request assert table.description == "Original description." table.description = "Updated description." table = client.update_table(table, ["description"]) # API request assert table.description == "Updated description." # [END bigquery_update_table_description]
Example #23
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_create_partitioned_table(client, to_delete): dataset_id = "create_table_partitioned_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = client.create_dataset(dataset_ref) to_delete.append(dataset) # [START bigquery_create_table_partitioned] # from google.cloud import bigquery # client = bigquery.Client() # project = client.project # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') table_ref = dataset_ref.table("my_partitioned_table") schema = [ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), bigquery.SchemaField("date", "DATE"), ] table = bigquery.Table(table_ref, schema=schema) table.time_partitioning = bigquery.TimePartitioning( type_=bigquery.TimePartitioningType.DAY, field="date", # name of column to use for partitioning expiration_ms=7776000000, ) # 90 days table = client.create_table(table) print( "Created table {}, partitioned on column {}".format( table.table_id, table.time_partitioning.field ) ) # [END bigquery_create_table_partitioned] assert table.time_partitioning.type_ == "DAY" assert table.time_partitioning.field == "date" assert table.time_partitioning.expiration_ms == 7776000000
Example #24
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_create_table_cmek(client, to_delete): dataset_id = "create_table_cmek_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) # [START bigquery_create_table_cmek] # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' table_ref = dataset.table("my_table") table = bigquery.Table(table_ref) # Set the encryption key to use for the table. # TODO: Replace this key with a key you have created in Cloud KMS. kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( "cloud-samples-tests", "us", "test", "test" ) table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=kms_key_name ) table = client.create_table(table) # API request assert table.encryption_configuration.kms_key_name == kms_key_name # [END bigquery_create_table_cmek]
Example #25
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_create_table_nested_repeated_schema(client, to_delete): dataset_id = "create_table_nested_repeated_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) # [START bigquery_nested_repeated_schema] # from google.cloud import bigquery # client = bigquery.Client() # project = client.project # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') schema = [ bigquery.SchemaField("id", "STRING", mode="NULLABLE"), bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), bigquery.SchemaField( "addresses", "RECORD", mode="REPEATED", fields=[ bigquery.SchemaField("status", "STRING", mode="NULLABLE"), bigquery.SchemaField("address", "STRING", mode="NULLABLE"), bigquery.SchemaField("city", "STRING", mode="NULLABLE"), bigquery.SchemaField("state", "STRING", mode="NULLABLE"), bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), ], ), ] table_ref = dataset_ref.table("my_table") table = bigquery.Table(table_ref, schema=schema) table = client.create_table(table) # API request print("Created table {}".format(table.full_table_id)) # [END bigquery_nested_repeated_schema]
Example #26
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def to_delete(client): doomed = [] yield doomed for item in doomed: if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)): retry_429(client.delete_dataset)(item, delete_contents=True) elif isinstance(item, storage.Bucket): retry_storage_errors(item.delete)() else: retry_429(item.delete)()
Example #27
Source File: test__query.py From pipelines with Apache License 2.0 | 5 votes |
def test_query_no_output_path(self, mock_client, mock_kfp_context, mock_dump_json, mock_display): mock_kfp_context().__enter__().context_id.return_value = 'ctx1' mock_client().get_job.side_effect = exceptions.NotFound('not found') mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1') mock_client().dataset.return_value = mock_dataset mock_client().get_dataset.return_value = bigquery.Dataset(mock_dataset) mock_response = { 'configuration': { 'query': { 'query': 'SELECT * FROM table_1' } } } mock_client().query.return_value.to_api_repr.return_value = mock_response result = query('SELECT * FROM table_1', 'project-1', 'dataset-1', 'table-1') self.assertEqual(mock_response, result) mock_client().create_dataset.assert_not_called() mock_client().extract_table.assert_not_called() expected_job_config = bigquery.QueryJobConfig() expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE expected_job_config.destination = mock_dataset.table('table-1') mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY, job_id = 'query_ctx1') actual_job_config = mock_client().query.call_args_list[0][0][1] self.assertDictEqual( expected_job_config.to_api_repr(), actual_job_config.to_api_repr() )
Example #28
Source File: test__query.py From pipelines with Apache License 2.0 | 5 votes |
def test_query_succeed(self, mock_client, mock_kfp_context, mock_dump_json, mock_display): mock_kfp_context().__enter__().context_id.return_value = 'ctx1' mock_client().get_job.side_effect = exceptions.NotFound('not found') mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1') mock_client().dataset.return_value = mock_dataset mock_client().get_dataset.side_effect = exceptions.NotFound('not found') mock_response = { 'configuration': { 'query': { 'query': 'SELECT * FROM table_1' } } } mock_client().query.return_value.to_api_repr.return_value = mock_response result = query('SELECT * FROM table_1', 'project-1', 'dataset-1', output_gcs_path='gs://output/path') self.assertEqual(mock_response, result) mock_client().create_dataset.assert_called() expected_job_config = bigquery.QueryJobConfig() expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE expected_job_config.destination = mock_dataset.table('query_ctx1') mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY, job_id = 'query_ctx1') actual_job_config = mock_client().query.call_args_list[0][0][1] self.assertDictEqual( expected_job_config.to_api_repr(), actual_job_config.to_api_repr() ) mock_client().extract_table.assert_called_with( mock_dataset.table('query_ctx1'), 'gs://output/path')
Example #29
Source File: samples_test.py From python-docs-samples with Apache License 2.0 | 5 votes |
def temp_dataset(): from google.cloud import bigquery client = bigquery.Client() dataset_id = "temp_dataset_{}".format(int(time.time() * 1000)) dataset_ref = bigquery.DatasetReference(client.project, dataset_id) dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) yield dataset client.delete_dataset(dataset, delete_contents=True)
Example #30
Source File: bigquery.py From loaner with Apache License 2.0 | 5 votes |
def __init__(self): if constants.ON_LOCAL: return self._client = bigquery.Client() self._dataset_ref = bigquery.DatasetReference( self._client.project, constants.BIGQUERY_DATASET_NAME)