Python Examples of google.cloud.bigquery.DatasetReference

Source File: main.py From professional-services with Apache License 2.0

7 votes

def execute_transformation_query(bq_client):
    """Executes transformation query to a new destination table.
    Args:
        bq_client: Object representing a reference to a BigQuery Client
    """
    dataset_ref = bq_client.get_dataset(bigquery.DatasetReference(
        project=config.config_vars['billing_project_id'],
        dataset_id=config.config_vars['output_dataset_id']))
    table_ref = dataset_ref.table(config.config_vars['output_table_name'])
    job_config = bigquery.QueryJobConfig()
    job_config.destination = table_ref
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    job_config.time_partitioning = bigquery.TimePartitioning(
        field='usage_start_time',
        expiration_ms=None)
    sql = file_to_string(config.config_vars['sql_file_path'])
    sql = sql.format(**config.config_vars)
    logging.info('Attempting query on all dates...')
    # Execute Query
    query_job = bq_client.query(
        sql,
        job_config=job_config)

    query_job.result()  # Waits for the query to finish
    logging.info('Transformation query complete. All partitions are updated.')

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_copy_table(self):
        # If we create a new table to copy from, the test won't work
        # because the new rows will be stored in the streaming buffer,
        # and copy jobs don't read the streaming buffer.
        # We could wait for the streaming buffer to empty, but that could
        # take minutes. Instead we copy a small public table.
        source_dataset = DatasetReference("bigquery-public-data", "samples")
        source_ref = source_dataset.table("shakespeare")
        dest_dataset = self.temp_dataset(_make_dataset_id("copy_table"))
        dest_ref = dest_dataset.table("destination_table")
        job_config = bigquery.CopyJobConfig()
        job = Config.CLIENT.copy_table(source_ref, dest_ref, job_config=job_config)
        job.result()

        dest_table = Config.CLIENT.get_table(dest_ref)
        self.to_delete.insert(0, dest_table)
        # Just check that we got some rows.
        got_rows = self._fetch_single_page(dest_table)
        self.assertTrue(len(got_rows) > 0)

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_extract_table(self):
        local_id = unique_resource_id()
        bucket_name = "bq_extract_test" + local_id
        source_blob_name = "person_ages.csv"
        dataset_id = _make_dataset_id("load_gcs_then_extract")
        table_id = "test_table"
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        table_ref = dataset_ref.table(table_id)
        table = Table(table_ref)
        self.to_delete.insert(0, table)
        bucket = self._create_bucket(bucket_name)
        self._load_table_for_extract_table(bucket, source_blob_name, table_ref, ROWS)
        destination_blob_name = "person_ages_out.csv"
        destination = bucket.blob(destination_blob_name)
        destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name)

        job = Config.CLIENT.extract_table(table_ref, destination_uri)
        job.result(timeout=100)

        self.to_delete.insert(0, destination)
        got_bytes = retry_storage_errors(destination.download_as_string)()
        got = got_bytes.decode("utf-8")
        self.assertIn("Bharney Rhubble", got)

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_get_table_w_public_dataset(self):
        public = "bigquery-public-data"
        dataset_id = "samples"
        table_id = "shakespeare"
        table_ref = DatasetReference(public, dataset_id).table(table_id)

        # Get table with reference.
        table = Config.CLIENT.get_table(table_ref)
        self.assertEqual(table.table_id, table_id)
        self.assertEqual(table.dataset_id, dataset_id)
        self.assertEqual(table.project, public)
        schema_names = [field.name for field in table.schema]
        self.assertEqual(schema_names, ["word", "word_count", "corpus", "corpus_date"])

        # Get table with string.
        table = Config.CLIENT.get_table("{}.{}.{}".format(public, dataset_id, table_id))
        self.assertEqual(table.table_id, table_id)
        self.assertEqual(table.dataset_id, dataset_id)
        self.assertEqual(table.project, public)

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def tearDown(self):
        def _still_in_use(bad_request):
            return any(
                error["reason"] == "resourceInUse" for error in bad_request._errors
            )

        retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
        retry_storage_errors_conflict = RetryErrors(
            (Conflict, TooManyRequests, InternalServerError, ServiceUnavailable)
        )
        for doomed in self.to_delete:
            if isinstance(doomed, storage.Bucket):
                retry_storage_errors_conflict(doomed.delete)(force=True)
            elif isinstance(doomed, (Dataset, bigquery.DatasetReference)):
                retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
            elif isinstance(doomed, (Table, bigquery.TableReference)):
                retry_in_use(Config.CLIENT.delete_table)(doomed)
            else:
                doomed.delete()

Source File: snippets.py From python-bigquery with Apache License 2.0

6 votes

def test_list_rows_as_dataframe(client):
    # [START bigquery_list_rows_dataframe]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    project = "bigquery-public-data"
    dataset_id = "samples"

    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table("shakespeare")
    table = client.get_table(table_ref)

    df = client.list_rows(table).to_dataframe()
    # [END bigquery_list_rows_dataframe]
    assert isinstance(df, pandas.DataFrame)
    assert len(list(df)) == len(table.schema)  # verify the number of columns
    assert len(df) == table.num_rows  # verify the number of rows

Source File: bigquery_test.py From loaner with Apache License 2.0

5 votes

def setUp(self):
    super(BigQueryClientTest, self).setUp()
    bq_patcher = mock.patch.object(gcloud_bq, 'Client', autospec=True)
    self.addCleanup(bq_patcher.stop)
    self.bq_mock = bq_patcher.start()
    self.dataset_ref = mock.Mock(spec=gcloud_bq.DatasetReference)
    self.table = mock.Mock(spec=gcloud_bq.Table)
    self.table.schema = []
    self.dataset_ref.table.return_value = self.table
    with mock.patch.object(
        bigquery.BigQueryClient, '__init__', return_value=None):
      self.client = bigquery.BigQueryClient()
      self.client._client = self.bq_mock()
      self.client._dataset_ref = self.dataset_ref
      self.client._client.insert_rows.return_value = None
      self.client._client.get_table.return_value = self.table
    self.nested_schema = [
        gcloud_bq.SchemaField('nested_string_attribute', 'STRING', 'NULLABLE')]
    self.entity_schema = [
        gcloud_bq.SchemaField('string_attribute', 'STRING', 'NULLABLE'),
        gcloud_bq.SchemaField('integer_attribute', 'INTEGER', 'NULLABLE'),
        gcloud_bq.SchemaField('boolean_attribute', 'BOOLEAN', 'NULLABLE'),
        gcloud_bq.SchemaField(
            'nested_attribute', 'RECORD', 'NULLABLE', fields=self.nested_schema)
    ]

    test_device = device_model.Device(
        serial_number='abc123', chrome_device_id='123123')
    test_device.put()
    test_row = bigquery_row_model.BigQueryRow.add(
        test_device, datetime.datetime.utcnow(),
        loanertest.USER_EMAIL, 'Enroll', 'This is a test')
    self.test_row_dict = test_row.to_json_dict()
    self.test_table = [(self.test_row_dict['ndb_key'],
                        self.test_row_dict['timestamp'],
                        self.test_row_dict['actor'],
                        self.test_row_dict['method'],
                        self.test_row_dict['summary'],
                        self.test_row_dict['entity'])]

Source File: helper.py From professional-services with Apache License 2.0

5 votes

def execute_query(bq_client: bigquery.Client, env_vars: {}, query_path: object,
                  output_table_name: str, time_partition: bool) -> None:
    """Executes transformation query to a new destination table.
    Args:
        bq_client: bigquery.Client object
        env_vars: Dictionary of key: value, where value is environment variable
        query_path: Object representing location of SQL query to execute
        output_table_name: String representing name of table that holds output
        time_partition: Boolean indicating whether to time-partition output
    """
    dataset_ref = bq_client.get_dataset(
        bigquery.DatasetReference(project=bq_client.project,
                                  dataset_id=env_vars['corrected_dataset_id']))
    table_ref = dataset_ref.table(output_table_name)
    job_config = bigquery.QueryJobConfig()
    job_config.destination = table_ref
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE

    # Time Partitioning table is only needed for final output query
    if time_partition:
        job_config.time_partitioning = bigquery.TimePartitioning(
            field='usage_start_time', expiration_ms=None)
    logging.info('Attempting query...')
    # Execute Query
    query_job = bq_client.query(query=render_template(query_path, env_vars), job_config=job_config)

    query_job.result()  # Waits for the query to finish

Source File: mock_bigquery_client.py From ethereum-etl-airflow with MIT License

5 votes

def dataset(self, dataset_id, project=None):
        if project is None:
            project = PROJECT
        return DatasetReference(project, dataset_id)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def _table_exists(t):
    try:
        tr = DatasetReference(t.project, t.dataset_id).table(t.table_id)
        Config.CLIENT.get_table(tr)
        return True
    except NotFound:
        return False

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def _dataset_exists(ds):
    try:
        Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id))
        return True
    except NotFound:
        return False

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def temp_dataset(self, dataset_id, location=None):
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        dataset = Dataset(dataset_ref)
        if location:
            dataset.location = location
        dataset = retry_403(Config.CLIENT.create_dataset)(dataset)
        self.to_delete.append(dataset)
        return dataset

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_query_w_wrong_config(self):
        from google.cloud.bigquery.job import LoadJobConfig

        good_query = "SELECT 1;"
        rows = list(Config.CLIENT.query("SELECT 1;").result())
        assert rows[0][0] == 1

        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, "dset")
        bad_config = LoadJobConfig()
        bad_config.destination = dataset_ref.table("tbl")
        with self.assertRaises(Exception):
            Config.CLIENT.query(good_query, job_config=bad_config).result()

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_list_partitions(self):
        table_ref = DatasetReference(
            "bigquery-public-data", "ethereum_blockchain"
        ).table("blocks")
        all_rows = Config.CLIENT.list_partitions(table_ref)
        self.assertIn("20180801", all_rows)
        self.assertGreater(len(all_rows), 1000)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_delete_dataset_with_string(self):
        dataset_id = _make_dataset_id("delete_table_true")
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref))
        self.assertTrue(_dataset_exists(dataset_ref))
        Config.CLIENT.delete_dataset(dataset_id)
        self.assertFalse(_dataset_exists(dataset_ref))

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_get_dataset(self):
        dataset_id = _make_dataset_id("get_dataset")
        client = Config.CLIENT
        project = client.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        dataset_arg = Dataset(dataset_ref)
        dataset_arg.friendly_name = "Friendly"
        dataset_arg.description = "Description"
        dataset = retry_403(client.create_dataset)(dataset_arg)
        self.to_delete.append(dataset)
        dataset_ref = bigquery.DatasetReference(project, dataset_id)

        # Get with a reference.
        got = client.get_dataset(dataset_ref)
        self.assertEqual(got.friendly_name, "Friendly")
        self.assertEqual(got.description, "Description")

        # Get with a string.
        got = client.get_dataset(dataset_id)
        self.assertEqual(got.friendly_name, "Friendly")
        self.assertEqual(got.description, "Description")

        # Get with a fully-qualified string.
        got = client.get_dataset("{}.{}".format(client.project, dataset_id))
        self.assertEqual(got.friendly_name, "Friendly")
        self.assertEqual(got.description, "Description")

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_extract_table_compressed(client, to_delete):
    bucket_name = "extract_shakespeare_compress_{}".format(_millis())
    storage_client = storage.Client()
    bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name)
    to_delete.append(bucket)
    project = "bigquery-public-data"
    dataset_id = "samples"

    # [START bigquery_extract_table_compressed]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # bucket_name = 'my-bucket'

    destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv.gz")
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table("shakespeare")
    job_config = bigquery.job.ExtractJobConfig()
    job_config.compression = bigquery.Compression.GZIP

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        # Location must match that of the source table.
        location="US",
        job_config=job_config,
    )  # API request
    extract_job.result()  # Waits for job to complete.
    # [END bigquery_extract_table_compressed]

    blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv.gz")
    assert blob.exists
    assert blob.size > 0
    to_delete.insert(0, blob)

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_extract_table_json(client, to_delete):
    bucket_name = "extract_shakespeare_json_{}".format(_millis())
    storage_client = storage.Client()
    bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name)
    to_delete.append(bucket)
    project = "bigquery-public-data"
    dataset_id = "samples"

    # [START bigquery_extract_table_json]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # bucket_name = 'my-bucket'

    destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.json")
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table("shakespeare")
    job_config = bigquery.job.ExtractJobConfig()
    job_config.destination_format = bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        job_config=job_config,
        # Location must match that of the source table.
        location="US",
    )  # API request
    extract_job.result()  # Waits for job to complete.
    # [END bigquery_extract_table_json]

    blob = retry_storage_errors(bucket.get_blob)("shakespeare.json")
    assert blob.exists
    assert blob.size > 0
    to_delete.insert(0, blob)

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_extract_table(client, to_delete):
    bucket_name = "extract_shakespeare_{}".format(_millis())
    storage_client = storage.Client()
    bucket = retry_storage_errors(storage_client.create_bucket)(bucket_name)
    to_delete.append(bucket)

    # [START bigquery_extract_table]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # bucket_name = 'my-bucket'
    project = "bigquery-public-data"
    dataset_id = "samples"
    table_id = "shakespeare"

    destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv")
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table(table_id)

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        # Location must match that of the source table.
        location="US",
    )  # API request
    extract_job.result()  # Waits for job to complete.

    print(
        "Exported {}:{}.{} to {}".format(project, dataset_id, table_id, destination_uri)
    )
    # [END bigquery_extract_table]

    blob = retry_storage_errors(bucket.get_blob)("shakespeare.csv")
    assert blob.exists
    assert blob.size > 0
    to_delete.insert(0, blob)

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_update_table_cmek(client, to_delete):
    """Patch a table's metadata."""
    dataset_id = "update_table_cmek_{}".format(_millis())
    table_id = "update_table_cmek_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    table = bigquery.Table(dataset.table(table_id))
    original_kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
        "cloud-samples-tests", "us", "test", "test"
    )
    table.encryption_configuration = bigquery.EncryptionConfiguration(
        kms_key_name=original_kms_key_name
    )
    table = client.create_table(table)

    # [START bigquery_update_table_cmek]
    # from google.cloud import bigquery
    # client = bigquery.Client()

    assert table.encryption_configuration.kms_key_name == original_kms_key_name

    # Set a new encryption key to use for the destination.
    # TODO: Replace this key with a key you have created in KMS.
    updated_kms_key_name = (
        "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/otherkey"
    )
    table.encryption_configuration = bigquery.EncryptionConfiguration(
        kms_key_name=updated_kms_key_name
    )

    table = client.update_table(table, ["encryption_configuration"])  # API request

    assert table.encryption_configuration.kms_key_name == updated_kms_key_name
    assert original_kms_key_name != updated_kms_key_name
    # [END bigquery_update_table_cmek]

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_relax_column(client, to_delete):
    """Updates a schema field from required to nullable."""
    dataset_id = "relax_column_dataset_{}".format(_millis())
    table_id = "relax_column_table_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    dataset = client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_relax_column]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # dataset_id = 'my_dataset'
    # table_id = 'my_table'

    original_schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    table_ref = dataset_ref.table(table_id)
    table = bigquery.Table(table_ref, schema=original_schema)
    table = client.create_table(table)
    assert all(field.mode == "REQUIRED" for field in table.schema)

    # SchemaField properties cannot be edited after initialization.
    # To make changes, construct new SchemaField objects.
    relaxed_schema = [
        bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
    ]
    table.schema = relaxed_schema
    table = client.update_table(table, ["schema"])

    assert all(field.mode == "NULLABLE" for field in table.schema)
    # [END bigquery_relax_column]

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_update_table_description(client, to_delete):
    """Update a table's description."""
    dataset_id = "update_table_description_dataset_{}".format(_millis())
    table_id = "update_table_description_table_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    table = bigquery.Table(dataset.table(table_id), schema=SCHEMA)
    table.description = "Original description."
    table = client.create_table(table)

    # [START bigquery_update_table_description]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = client.project
    # dataset_ref = bigquery.DatasetReference(project, dataset_id)
    # table_ref = dataset_ref.table('my_table')
    # table = client.get_table(table_ref)  # API request

    assert table.description == "Original description."
    table.description = "Updated description."

    table = client.update_table(table, ["description"])  # API request

    assert table.description == "Updated description."
    # [END bigquery_update_table_description]

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_create_partitioned_table(client, to_delete):
    dataset_id = "create_table_partitioned_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = client.create_dataset(dataset_ref)
    to_delete.append(dataset)

    # [START bigquery_create_table_partitioned]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = client.project
    # dataset_ref = bigquery.DatasetReference(project, 'my_dataset')

    table_ref = dataset_ref.table("my_partitioned_table")
    schema = [
        bigquery.SchemaField("name", "STRING"),
        bigquery.SchemaField("post_abbr", "STRING"),
        bigquery.SchemaField("date", "DATE"),
    ]
    table = bigquery.Table(table_ref, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(
        type_=bigquery.TimePartitioningType.DAY,
        field="date",  # name of column to use for partitioning
        expiration_ms=7776000000,
    )  # 90 days

    table = client.create_table(table)

    print(
        "Created table {}, partitioned on column {}".format(
            table.table_id, table.time_partitioning.field
        )
    )
    # [END bigquery_create_table_partitioned]

    assert table.time_partitioning.type_ == "DAY"
    assert table.time_partitioning.field == "date"
    assert table.time_partitioning.expiration_ms == 7776000000

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_create_table_cmek(client, to_delete):
    dataset_id = "create_table_cmek_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_create_table_cmek]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # dataset_id = 'my_dataset'

    table_ref = dataset.table("my_table")
    table = bigquery.Table(table_ref)

    # Set the encryption key to use for the table.
    # TODO: Replace this key with a key you have created in Cloud KMS.
    kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
        "cloud-samples-tests", "us", "test", "test"
    )
    table.encryption_configuration = bigquery.EncryptionConfiguration(
        kms_key_name=kms_key_name
    )

    table = client.create_table(table)  # API request

    assert table.encryption_configuration.kms_key_name == kms_key_name
    # [END bigquery_create_table_cmek]

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def test_create_table_nested_repeated_schema(client, to_delete):
    dataset_id = "create_table_nested_repeated_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_nested_repeated_schema]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = client.project
    # dataset_ref = bigquery.DatasetReference(project, 'my_dataset')

    schema = [
        bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
        bigquery.SchemaField(
            "addresses",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
            ],
        ),
    ]
    table_ref = dataset_ref.table("my_table")
    table = bigquery.Table(table_ref, schema=schema)
    table = client.create_table(table)  # API request

    print("Created table {}".format(table.full_table_id))
    # [END bigquery_nested_repeated_schema]

Source File: snippets.py From python-bigquery with Apache License 2.0

5 votes

def to_delete(client):
    doomed = []
    yield doomed
    for item in doomed:
        if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)):
            retry_429(client.delete_dataset)(item, delete_contents=True)
        elif isinstance(item, storage.Bucket):
            retry_storage_errors(item.delete)()
        else:
            retry_429(item.delete)()

Source File: test__query.py From pipelines with Apache License 2.0

5 votes

def test_query_no_output_path(self, mock_client,
        mock_kfp_context, mock_dump_json, mock_display):
        mock_kfp_context().__enter__().context_id.return_value = 'ctx1'
        mock_client().get_job.side_effect = exceptions.NotFound('not found')
        mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1')
        mock_client().dataset.return_value = mock_dataset
        mock_client().get_dataset.return_value = bigquery.Dataset(mock_dataset)
        mock_response = {
            'configuration': {
                'query': {
                    'query': 'SELECT * FROM table_1'
                }
            }
        }
        mock_client().query.return_value.to_api_repr.return_value = mock_response

        result = query('SELECT * FROM table_1', 'project-1', 'dataset-1', 'table-1')

        self.assertEqual(mock_response, result)
        mock_client().create_dataset.assert_not_called()
        mock_client().extract_table.assert_not_called()

        expected_job_config = bigquery.QueryJobConfig()
        expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
        expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE
        expected_job_config.destination = mock_dataset.table('table-1')
        mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY,
            job_id = 'query_ctx1')
        actual_job_config = mock_client().query.call_args_list[0][0][1]
        self.assertDictEqual(
            expected_job_config.to_api_repr(),
            actual_job_config.to_api_repr()
        )

Source File: test__query.py From pipelines with Apache License 2.0

5 votes

def test_query_succeed(self, mock_client,
        mock_kfp_context, mock_dump_json, mock_display):
        mock_kfp_context().__enter__().context_id.return_value = 'ctx1'
        mock_client().get_job.side_effect = exceptions.NotFound('not found')
        mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1')
        mock_client().dataset.return_value = mock_dataset
        mock_client().get_dataset.side_effect = exceptions.NotFound('not found')
        mock_response = {
            'configuration': {
                'query': {
                    'query': 'SELECT * FROM table_1'
                }
            }
        }
        mock_client().query.return_value.to_api_repr.return_value = mock_response

        result = query('SELECT * FROM table_1', 'project-1', 'dataset-1', 
            output_gcs_path='gs://output/path')

        self.assertEqual(mock_response, result)
        mock_client().create_dataset.assert_called()
        expected_job_config = bigquery.QueryJobConfig()
        expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
        expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE
        expected_job_config.destination = mock_dataset.table('query_ctx1')
        mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY,
            job_id = 'query_ctx1')
        actual_job_config = mock_client().query.call_args_list[0][0][1]
        self.assertDictEqual(
            expected_job_config.to_api_repr(),
            actual_job_config.to_api_repr()
        )
        mock_client().extract_table.assert_called_with(
            mock_dataset.table('query_ctx1'),
            'gs://output/path')

Source File: samples_test.py From python-docs-samples with Apache License 2.0

5 votes

def temp_dataset():
    from google.cloud import bigquery

    client = bigquery.Client()
    dataset_id = "temp_dataset_{}".format(int(time.time() * 1000))
    dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
    dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
    yield dataset
    client.delete_dataset(dataset, delete_contents=True)

Source File: bigquery.py From loaner with Apache License 2.0

5 votes

def __init__(self):
    if constants.ON_LOCAL:
      return
    self._client = bigquery.Client()
    self._dataset_ref = bigquery.DatasetReference(
        self._client.project, constants.BIGQUERY_DATASET_NAME)

Python google.cloud.bigquery.DatasetReference() Examples