Python google.cloud.bigquery.DatasetReference() Examples

The following are 30 code examples of google.cloud.bigquery.DatasetReference(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.bigquery , or try the search function .
Example #1
Source File: main.py    From professional-services with Apache License 2.0 7 votes vote down vote up
def execute_transformation_query(bq_client):
    """Executes transformation query to a new destination table.
    Args:
        bq_client: Object representing a reference to a BigQuery Client
    """
    dataset_ref = bq_client.get_dataset(bigquery.DatasetReference(
        project=config.config_vars['billing_project_id'],
        dataset_id=config.config_vars['output_dataset_id']))
    table_ref = dataset_ref.table(config.config_vars['output_table_name'])
    job_config = bigquery.QueryJobConfig()
    job_config.destination = table_ref
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    job_config.time_partitioning = bigquery.TimePartitioning(
        field='usage_start_time',
        expiration_ms=None)
    sql = file_to_string(config.config_vars['sql_file_path'])
    sql = sql.format(**config.config_vars)
    logging.info('Attempting query on all dates...')
    # Execute Query
    query_job = bq_client.query(
        sql,
        job_config=job_config)

    query_job.result()  # Waits for the query to finish
    logging.info('Transformation query complete. All partitions are updated.') 
Example #2
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_copy_table(self):
        # If we create a new table to copy from, the test won't work
        # because the new rows will be stored in the streaming buffer,
        # and copy jobs don't read the streaming buffer.
        # We could wait for the streaming buffer to empty, but that could
        # take minutes. Instead we copy a small public table.
        source_dataset = DatasetReference("bigquery-public-data", "samples")
        source_ref = source_dataset.table("shakespeare")
        dest_dataset = self.temp_dataset(_make_dataset_id("copy_table"))
        dest_ref = dest_dataset.table("destination_table")
        job_config = bigquery.CopyJobConfig()
        job = Config.CLIENT.copy_table(source_ref, dest_ref, job_config=job_config)
        job.result()

        dest_table = Config.CLIENT.get_table(dest_ref)
        self.to_delete.insert(0, dest_table)
        # Just check that we got some rows.
        got_rows = self._fetch_single_page(dest_table)
        self.assertTrue(len(got_rows) > 0) 
Example #3
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_extract_table(self):
        local_id = unique_resource_id()
        bucket_name = "bq_extract_test" + local_id
        source_blob_name = "person_ages.csv"
        dataset_id = _make_dataset_id("load_gcs_then_extract")
        table_id = "test_table"
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        table_ref = dataset_ref.table(table_id)
        table = Table(table_ref)
        self.to_delete.insert(0, table)
        bucket = self._create_bucket(bucket_name)
        self._load_table_for_extract_table(bucket, source_blob_name, table_ref, ROWS)
        destination_blob_name = "person_ages_out.csv"
        destination = bucket.blob(destination_blob_name)
        destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name)

        job = Config.CLIENT.extract_table(table_ref, destination_uri)
        job.result(timeout=100)

        self.to_delete.insert(0, destination)
        got_bytes = retry_storage_errors(destination.download_as_string)()
        got = got_bytes.decode("utf-8")
        self.assertIn("Bharney Rhubble", got) 
Example #4
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_get_table_w_public_dataset(self):
        public = "bigquery-public-data"
        dataset_id = "samples"
        table_id = "shakespeare"
        table_ref = DatasetReference(public, dataset_id).table(table_id)

        # Get table with reference.
        table = Config.CLIENT.get_table(table_ref)
        self.assertEqual(table.table_id, table_id)
        self.assertEqual(table.dataset_id, dataset_id)
        self.assertEqual(table.project, public)
        schema_names = [field.name for field in table.schema]
        self.assertEqual(schema_names, ["word", "word_count", "corpus", "corpus_date"])

        # Get table with string.
        table = Config.CLIENT.get_table("{}.{}.{}".format(public, dataset_id, table_id))
        self.assertEqual(table.table_id, table_id)
        self.assertEqual(table.dataset_id, dataset_id)
        self.assertEqual(table.project, public) 
Example #5
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def tearDown(self):
        def _still_in_use(bad_request):
            return any(
                error["reason"] == "resourceInUse" for error in bad_request._errors
            )

        retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
        retry_storage_errors_conflict = RetryErrors(
            (Conflict, TooManyRequests, InternalServerError, ServiceUnavailable)
        )
        for doomed in self.to_delete:
            if isinstance(doomed, storage.Bucket):
                retry_storage_errors_conflict(doomed.delete)(force=True)
            elif isinstance(doomed, (Dataset, bigquery.DatasetReference)):
                retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
            elif isinstance(doomed, (Table, bigquery.TableReference)):
                retry_in_use(Config.CLIENT.delete_table)(doomed)
            else:
                doomed.delete() 
Example #6
Source File: snippets.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_list_rows_as_dataframe(client):
    # [START bigquery_list_rows_dataframe]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    project = "bigquery-public-data"
    dataset_id = "samples"

    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table("shakespeare")
    table = client.get_table(table_ref)

    df = client.list_rows(table).to_dataframe()
    # [END bigquery_list_rows_dataframe]
    assert isinstance(df, pandas.DataFrame)
    assert len(list(df)) == len(table.schema)  # verify the number of columns
    assert len(df) == table.num_rows  # verify the number of rows 
Example #7
Source File: bigquery_test.py    From loaner with Apache License 2.0 5 votes vote down vote up
def setUp(self):
    super(BigQueryClientTest, self).setUp()
    bq_patcher = mock.patch.object(gcloud_bq, 'Client', autospec=True)
    self.addCleanup(bq_patcher.stop)
    self.bq_mock = bq_patcher.start()
    self.dataset_ref = mock.Mock(spec=gcloud_bq.DatasetReference)
    self.table = mock.Mock(spec=gcloud_bq.Table)
    self.table.schema = []
    self.dataset_ref.table.return_value = self.table
    with mock.patch.object(
        bigquery.BigQueryClient, '__init__', return_value=None):
      self.client = bigquery.BigQueryClient()
      self.client._client = self.bq_mock()
      self.client._dataset_ref = self.dataset_ref
      self.client._client.insert_rows.return_value = None
      self.client._client.get_table.return_value = self.table
    self.nested_schema = [
        gcloud_bq.SchemaField('nested_string_attribute', 'STRING', 'NULLABLE')]
    self.entity_schema = [
        gcloud_bq.SchemaField('string_attribute', 'STRING', 'NULLABLE'),
        gcloud_bq.SchemaField('integer_attribute', 'INTEGER', 'NULLABLE'),
        gcloud_bq.SchemaField('boolean_attribute', 'BOOLEAN', 'NULLABLE'),
        gcloud_bq.SchemaField(
            'nested_attribute', 'RECORD', 'NULLABLE', fields=self.nested_schema)
    ]

    test_device = device_model.Device(
        serial_number='abc123', chrome_device_id='123123')
    test_device.put()
    test_row = bigquery_row_model.BigQueryRow.add(
        test_device, datetime.datetime.utcnow(),
        loanertest.USER_EMAIL, 'Enroll', 'This is a test')
    self.test_row_dict = test_row.to_json_dict()
    self.test_table = [(self.test_row_dict['ndb_key'],
                        self.test_row_dict['timestamp'],
                        self.test_row_dict['actor'],
                        self.test_row_dict['method'],
                        self.test_row_dict['summary'],
                        self.test_row_dict['entity'])] 
Example #8
Source File: helper.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def execute_query(bq_client: bigquery.Client, env_vars: {}, query_path: object,
                  output_table_name: str, time_partition: bool) -> None:
    """Executes transformation query to a new destination table.
    Args:
        bq_client: bigquery.Client object
        env_vars: Dictionary of key: value, where value is environment variable
        query_path: Object representing location of SQL query to execute
        output_table_name: String representing name of table that holds output
        time_partition: Boolean indicating whether to time-partition output
    """
    dataset_ref = bq_client.get_dataset(
        bigquery.DatasetReference(project=bq_client.project,
                                  dataset_id=env_vars['corrected_dataset_id']))
    table_ref = dataset_ref.table(output_table_name)
    job_config = bigquery.QueryJobConfig()
    job_config.destination = table_ref
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE

    # Time Partitioning table is only needed for final output query
    if time_partition:
        job_config.time_partitioning = bigquery.TimePartitioning(
            field='usage_start_time', expiration_ms=None)
    logging.info('Attempting query...')
    # Execute Query
    query_job = bq_client.query(query=render_template(query_path, env_vars), job_config=job_config)

    query_job.result()  # Waits for the query to finish 
Example #9
Source File: mock_bigquery_client.py    From ethereum-etl-airflow with MIT License 5 votes vote down vote up
def dataset(self, dataset_id, project=None):
        if project is None:
            project = PROJECT
        return DatasetReference(project, dataset_id) 
Example #10
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def _table_exists(t):
    try:
        tr = DatasetReference(t.project, t.dataset_id).table(t.table_id)
        Config.CLIENT.get_table(tr)
        return True
    except NotFound:
        return False 
Example #11
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def _dataset_exists(ds):
    try:
        Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id))
        return True
    except NotFound:
        return False 
Example #12
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def temp_dataset(self, dataset_id, location=None):
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        dataset = Dataset(dataset_ref)
        if location:
            dataset.location = location
        dataset = retry_403(Config.CLIENT.create_dataset)(dataset)
        self.to_delete.append(dataset)
        return dataset 
Example #13
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_query_w_wrong_config(self):
        from google.cloud.bigquery.job import LoadJobConfig

        good_query = "SELECT 1;"
        rows = list(Config.CLIENT.query("SELECT 1;").result())
        assert rows[0][0] == 1

        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, "dset")
        bad_config = LoadJobConfig()
        bad_config.destination = dataset_ref.table("tbl")
        with self.assertRaises(Exception):
            Config.CLIENT.query(good_query, job_config=bad_config).result() 
Example #14
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_list_partitions(self):
        table_ref = DatasetReference(
            "bigquery-public-data", "ethereum_blockchain"
        ).table("blocks")
        all_rows = Config.CLIENT.list_partitions(table_ref)
        self.assertIn("20180801", all_rows)
        self.assertGreater(len(all_rows), 1000) 
Example #15
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_delete_dataset_with_string(self):
        dataset_id = _make_dataset_id("delete_table_true")
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref))
        self.assertTrue(_dataset_exists(dataset_ref))
        Config.CLIENT.delete_dataset(dataset_id)
        self.assertFalse(_dataset_exists(dataset_ref)) 
Example #16
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_get_dataset(self):
        dataset_id = _make_dataset_id("get_dataset")
        client = Config.CLIENT
        project = client.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        dataset_arg = Dataset(dataset_ref)
        dataset_arg.friendly_name = "Friendly"
        dataset_arg.description = "Description"
        dataset = retry_403(client.create_dataset)(dataset_arg)
        self.to_delete.append(dataset)
        dataset_ref = bigquery.DatasetReference(project, dataset_id)

        # Get with a reference.
        got = client.get_dataset(dataset_ref)
        self.assertEqual(got.friendly_name, "Friendly")
        self.assertEqual(got.description, "Description")

        # Get with a string.
        got = client.get_dataset(dataset_id)
        self.assertEqual(got.friendly_name, "Friendly")
        self.assertEqual(got.description, "Description")

        # Get with a fully-qualified string.
        got = client.get_dataset("{}.{}".format(client.project, dataset_id))
        self.assertEqual(got.friendly_name, "Friendly")
        self.assertEqual(got.description, "Description") 
Example #17
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_extract_table_compressed(client, to_delete):
    bucket_name = "extract_shakespeare_compress_{}".format(_millis())
    storage_client = storage.Client()
    bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name)
    to_delete.append(bucket)
    project = "bigquery-public-data"
    dataset_id = "samples"

    # [START bigquery_extract_table_compressed]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # bucket_name = 'my-bucket'

    destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz")
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table("shakespeare")
    job_config = bigquery.job.ExtractJobConfig()
    job_config.compression = bigquery.Compression.GZIP

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        # Location must match that of the source table.
        location="US",
        job_config=job_config,
    )  # API request
    extract_job.result()  # Waits for job to complete.
    # [END bigquery_extract_table_compressed]

    blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv.gz")
    assert blob.exists
    assert blob.size > 0
    to_delete.insert(0, blob) 
Example #18
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_extract_table_json(client, to_delete):
    bucket_name = "extract_shakespeare_json_{}".format(_millis())
    storage_client = storage.Client()
    bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name)
    to_delete.append(bucket)
    project = "bigquery-public-data"
    dataset_id = "samples"

    # [START bigquery_extract_table_json]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # bucket_name = 'my-bucket'

    destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.json")
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table("shakespeare")
    job_config = bigquery.job.ExtractJobConfig()
    job_config.destination_format = bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        job_config=job_config,
        # Location must match that of the source table.
        location="US",
    )  # API request
    extract_job.result()  # Waits for job to complete.
    # [END bigquery_extract_table_json]

    blob = retry_storage_errors(bucket.get_blob)("shakespeare.json")
    assert blob.exists
    assert blob.size > 0
    to_delete.insert(0, blob) 
Example #19
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_extract_table(client, to_delete):
    bucket_name = "extract_shakespeare_{}".format(_millis())
    storage_client = storage.Client()
    bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name)
    to_delete.append(bucket)

    # [START bigquery_extract_table]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # bucket_name = 'my-bucket'
    project = "bigquery-public-data"
    dataset_id = "samples"
    table_id = "shakespeare"

    destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv")
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table(table_id)

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        # Location must match that of the source table.
        location="US",
    )  # API request
    extract_job.result()  # Waits for job to complete.

    print(
        "Exported {}:{}.{} to {}".format(project, dataset_id, table_id, destination_uri)
    )
    # [END bigquery_extract_table]

    blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv")
    assert blob.exists
    assert blob.size > 0
    to_delete.insert(0, blob) 
Example #20
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_update_table_cmek(client, to_delete):
    """Patch a table's metadata."""
    dataset_id = "update_table_cmek_{}".format(_millis())
    table_id = "update_table_cmek_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    table = bigquery.Table(dataset.table(table_id))
    original_kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
        "cloud-samples-tests", "us", "test", "test"
    )
    table.encryption_configuration = bigquery.EncryptionConfiguration(
        kms_key_name=original_kms_key_name
    )
    table = client.create_table(table)

    # [START bigquery_update_table_cmek]
    # from google.cloud import bigquery
    # client = bigquery.Client()

    assert table.encryption_configuration.kms_key_name == original_kms_key_name

    # Set a new encryption key to use for the destination.
    # TODO: Replace this key with a key you have created in KMS.
    updated_kms_key_name = (
        "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/otherkey"
    )
    table.encryption_configuration = bigquery.EncryptionConfiguration(
        kms_key_name=updated_kms_key_name
    )

    table = client.update_table(table, ["encryption_configuration"])  # API request

    assert table.encryption_configuration.kms_key_name == updated_kms_key_name
    assert original_kms_key_name != updated_kms_key_name
    # [END bigquery_update_table_cmek] 
Example #21
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_relax_column(client, to_delete):
    """Updates a schema field from required to nullable."""
    dataset_id = "relax_column_dataset_{}".format(_millis())
    table_id = "relax_column_table_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    dataset = client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_relax_column]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # dataset_id = 'my_dataset'
    # table_id = 'my_table'

    original_schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table(table_id)
    table = bigquery.Table(table_ref, schema=original_schema)
    table = client.create_table(table)
    assert all(field.mode == "REQUIRED" for field in table.schema)

    # SchemaField properties cannot be edited after initialization.
    # To make changes, construct new SchemaField objects.
    relaxed_schema = [
        bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
    ]
    table.schema = relaxed_schema
    table = client.update_table(table, ["schema"])

    assert all(field.mode == "NULLABLE" for field in table.schema)
    # [END bigquery_relax_column] 
Example #22
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_update_table_description(client, to_delete):
    """Update a table's description."""
    dataset_id = "update_table_description_dataset_{}".format(_millis())
    table_id = "update_table_description_table_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    table = bigquery.Table(dataset.table(table_id), schema=SCHEMA)
    table.description = "Original description."
    table = client.create_table(table)

    # [START bigquery_update_table_description]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = client.project
    # dataset_ref = bigquery.DatasetReference(project, dataset_id)
    # table_ref = dataset_ref.table('my_table')
    # table = client.get_table(table_ref)  # API request

    assert table.description == "Original description."
    table.description = "Updated description."

    table = client.update_table(table, ["description"])  # API request

    assert table.description == "Updated description."
    # [END bigquery_update_table_description] 
Example #23
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_create_partitioned_table(client, to_delete):
    dataset_id = "create_table_partitioned_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = client.create_dataset(dataset_ref)
    to_delete.append(dataset)

    # [START bigquery_create_table_partitioned]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = client.project
    # dataset_ref = bigquery.DatasetReference(project, 'my_dataset')

    table_ref = dataset_ref.table("my_partitioned_table")
    schema = [
        bigquery.SchemaField("name", "STRING"),
        bigquery.SchemaField("post_abbr", "STRING"),
        bigquery.SchemaField("date", "DATE"),
    ]
    table = bigquery.Table(table_ref, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(
        type_=bigquery.TimePartitioningType.DAY,
        field="date",  # name of column to use for partitioning
        expiration_ms=7776000000,
    )  # 90 days

    table = client.create_table(table)

    print(
        "Created table {}, partitioned on column {}".format(
            table.table_id, table.time_partitioning.field
        )
    )
    # [END bigquery_create_table_partitioned]

    assert table.time_partitioning.type_ == "DAY"
    assert table.time_partitioning.field == "date"
    assert table.time_partitioning.expiration_ms == 7776000000 
Example #24
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_create_table_cmek(client, to_delete):
    dataset_id = "create_table_cmek_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_create_table_cmek]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # dataset_id = 'my_dataset'

    table_ref = dataset.table("my_table")
    table = bigquery.Table(table_ref)

    # Set the encryption key to use for the table.
    # TODO: Replace this key with a key you have created in Cloud KMS.
    kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
        "cloud-samples-tests", "us", "test", "test"
    )
    table.encryption_configuration = bigquery.EncryptionConfiguration(
        kms_key_name=kms_key_name
    )

    table = client.create_table(table)  # API request

    assert table.encryption_configuration.kms_key_name == kms_key_name
    # [END bigquery_create_table_cmek] 
Example #25
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_create_table_nested_repeated_schema(client, to_delete):
    dataset_id = "create_table_nested_repeated_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_nested_repeated_schema]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = client.project
    # dataset_ref = bigquery.DatasetReference(project, 'my_dataset')

    schema = [
        bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
        bigquery.SchemaField(
            "addresses",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
            ],
        ),
    ]
    table_ref = dataset_ref.table("my_table")
    table = bigquery.Table(table_ref, schema=schema)
    table = client.create_table(table)  # API request

    print("Created table {}".format(table.full_table_id))
    # [END bigquery_nested_repeated_schema] 
Example #26
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def to_delete(client):
    doomed = []
    yield doomed
    for item in doomed:
        if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)):
            retry_429(client.delete_dataset)(item, delete_contents=True)
        elif isinstance(item, storage.Bucket):
            retry_storage_errors(item.delete)()
        else:
            retry_429(item.delete)() 
Example #27
Source File: test__query.py    From pipelines with Apache License 2.0 5 votes vote down vote up
def test_query_no_output_path(self, mock_client,
        mock_kfp_context, mock_dump_json, mock_display):
        mock_kfp_context().__enter__().context_id.return_value = 'ctx1'
        mock_client().get_job.side_effect = exceptions.NotFound('not found')
        mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1')
        mock_client().dataset.return_value = mock_dataset
        mock_client().get_dataset.return_value = bigquery.Dataset(mock_dataset)
        mock_response = {
            'configuration': {
                'query': {
                    'query': 'SELECT * FROM table_1'
                }
            }
        }
        mock_client().query.return_value.to_api_repr.return_value = mock_response

        result = query('SELECT * FROM table_1', 'project-1', 'dataset-1', 'table-1')

        self.assertEqual(mock_response, result)
        mock_client().create_dataset.assert_not_called()
        mock_client().extract_table.assert_not_called()

        expected_job_config = bigquery.QueryJobConfig()
        expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
        expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE
        expected_job_config.destination = mock_dataset.table('table-1')
        mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY,
            job_id = 'query_ctx1')
        actual_job_config = mock_client().query.call_args_list[0][0][1]
        self.assertDictEqual(
            expected_job_config.to_api_repr(),
            actual_job_config.to_api_repr()
        ) 
Example #28
Source File: test__query.py    From pipelines with Apache License 2.0 5 votes vote down vote up
def test_query_succeed(self, mock_client,
        mock_kfp_context, mock_dump_json, mock_display):
        mock_kfp_context().__enter__().context_id.return_value = 'ctx1'
        mock_client().get_job.side_effect = exceptions.NotFound('not found')
        mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1')
        mock_client().dataset.return_value = mock_dataset
        mock_client().get_dataset.side_effect = exceptions.NotFound('not found')
        mock_response = {
            'configuration': {
                'query': {
                    'query': 'SELECT * FROM table_1'
                }
            }
        }
        mock_client().query.return_value.to_api_repr.return_value = mock_response

        result = query('SELECT * FROM table_1', 'project-1', 'dataset-1', 
            output_gcs_path='gs://output/path')

        self.assertEqual(mock_response, result)
        mock_client().create_dataset.assert_called()
        expected_job_config = bigquery.QueryJobConfig()
        expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
        expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE
        expected_job_config.destination = mock_dataset.table('query_ctx1')
        mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY,
            job_id = 'query_ctx1')
        actual_job_config = mock_client().query.call_args_list[0][0][1]
        self.assertDictEqual(
            expected_job_config.to_api_repr(),
            actual_job_config.to_api_repr()
        )
        mock_client().extract_table.assert_called_with(
            mock_dataset.table('query_ctx1'),
            'gs://output/path') 
Example #29
Source File: samples_test.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def temp_dataset():
    from google.cloud import bigquery

    client = bigquery.Client()
    dataset_id = "temp_dataset_{}".format(int(time.time() * 1000))
    dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
    dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
    yield dataset
    client.delete_dataset(dataset, delete_contents=True) 
Example #30
Source File: bigquery.py    From loaner with Apache License 2.0 5 votes vote down vote up
def __init__(self):
    if constants.ON_LOCAL:
      return
    self._client = bigquery.Client()
    self._dataset_ref = bigquery.DatasetReference(
        self._client.project, constants.BIGQUERY_DATASET_NAME)