Python google.cloud.bigquery.Dataset() Examples

The following are 30 code examples of google.cloud.bigquery.Dataset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.bigquery , or try the search function .
Example #1
Source File: test_nested_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def setup(self):
        """Sets up resources for tests.
        """
        self.bq_client = bigquery.Client()
        self.dataset_id = 'user_updater_test'
        self.dataset_ref = self.bq_client.dataset(self.dataset_id)
        try:
            self.dataset = self.bq_client.get_dataset(self.dataset_ref)
        except exceptions.NotFound:
            dataset = bigquery.Dataset(self.dataset_ref)
            self.dataset = self.bq_client.create_dataset(dataset)
        schema_path = 'test_schemas/test_nested_schema.json'
        abs_path = os.path.abspath(os.path.dirname(__file__))
        self.schema_path = os.path.join(abs_path, schema_path)
        schema = user_schema.UserSchema(self.schema_path)
        self.bq_schema = schema.translate_json_schema()
        self.user_info_updates_id = 'test_nested_user_info_updates'
        self.user_info_updates_table = self.create_table(
            self.user_info_updates_id)
        self.temp_user_info_updates_id = 'test_nested_temp_user_info_updates'
        self.temp_user_info_updates_table = self.create_table(
            self.temp_user_info_updates_id)
        self.user_info_final_id = 'test_nested_user_info_final'
        self.user_info_final_table = self.create_table(self.user_info_final_id) 
Example #2
Source File: run_vcf_to_bq_tests.py    From gcp-variant-transforms with Apache License 2.0 6 votes vote down vote up
def __enter__(self):
    if not self.revalidation_dataset_id:
      client = bigquery.Client(project=self.project)
      dataset_ref = client.dataset(self.dataset_id)
      dataset = bigquery.Dataset(dataset_ref)
      dataset.location = 'US'
      _ = client.create_dataset(dataset)  # See #171, pylint: disable=no-member
    return self 
Example #3
Source File: create_dataset.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def create_dataset(dataset_id):

    # [START bigquery_create_dataset]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set dataset_id to the ID of the dataset to create.
    # dataset_id = "{}.your_dataset".format(client.project)

    # Construct a full Dataset object to send to the API.
    dataset = bigquery.Dataset(dataset_id)

    # TODO(developer): Specify the geographic location where the dataset should reside.
    dataset.location = "US"

    # Send the dataset to the API for creation, with an explicit timeout.
    # Raises google.api_core.exceptions.Conflict if the Dataset already
    # exists within the project.
    dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
    print("Created dataset {}.{}".format(client.project, dataset.dataset_id))
    # [END bigquery_create_dataset] 
Example #4
Source File: test_query_generator.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def setUp(self):
        """Sets up resources for tests.
        """
        self.bq_client = bigquery.Client()
        self.dataset_id = 'bq_benchmark_test_dataset'
        self.dataset_ref = self.bq_client.dataset(self.dataset_id)
        dataset = bigquery.Dataset(self.dataset_ref)
        self.dataset = self.bq_client.create_dataset(dataset)
        self.table_id = 'test_table'
        abs_path = os.path.abspath(os.path.dirname(__file__))
        json_schema_filename = os.path.join(abs_path,
                                            'test_schemas/test_schema.json')
        self.table_util = table_util.TableUtil(
            table_id=self.table_id,
            dataset_id=self.dataset_id,
            json_schema_filename=json_schema_filename,
        )
        self.table_util.create_table()
        self.test_query_generator = query_generator.QueryGenerator(
            table_id=self.table_id, dataset_id=self.dataset_id) 
Example #5
Source File: test_table_util.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def setUp(self):
        """Sets up resources for tests.
        """
        self.bq_client = bigquery.Client()
        self.dataset_id = 'bq_benchmark_test_dataset'
        self.dataset_ref = self.bq_client.dataset(self.dataset_id)
        dataset = bigquery.Dataset(self.dataset_ref)
        self.dataset = self.bq_client.create_dataset(dataset)
        self.table_id = 'test_table'
        abs_path = os.path.abspath(os.path.dirname(__file__))
        json_schema_filename = os.path.join(abs_path,
                                            'test_schemas/test_schema.json')
        self.table_util = table_util.TableUtil(
            table_id=self.table_id,
            dataset_id=self.dataset_id,
            json_schema_filename=json_schema_filename,
        ) 
Example #6
Source File: test_nested_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def setup(self):
        """Sets up resources for tests.
        """
        self.bq_client = bigquery.Client()
        self.dataset_id = 'user_updater_test'
        self.dataset_ref = self.bq_client.dataset(self.dataset_id)
        try:
            self.dataset = self.bq_client.get_dataset(self.dataset_ref)
        except exceptions.NotFound:
            dataset = bigquery.Dataset(self.dataset_ref)
            self.dataset = self.bq_client.create_dataset(dataset)
        schema_path = 'test_schemas/test_nested_schema.json'
        abs_path = os.path.abspath(os.path.dirname(__file__))
        self.schema_path = os.path.join(abs_path, schema_path)
        schema = user_schema.UserSchema(self.schema_path)
        self.bq_schema = schema.translate_json_schema()
        self.user_info_updates_id = 'test_nested_user_info_updates'
        self.user_info_updates_table = self.create_table(
            self.user_info_updates_id)
        self.temp_user_info_updates_id = 'test_nested_temp_user_info_updates'
        self.temp_user_info_updates_table = self.create_table(
            self.temp_user_info_updates_id)
        self.user_info_final_id = 'test_nested_user_info_final'
        self.user_info_final_table = self.create_table(self.user_info_final_id) 
Example #7
Source File: test_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def setup(self):
        """Sets up resources for tests.
        """
        self.bq_client = bigquery.Client()
        self.dataset_id = 'user_updater_test'
        self.dataset_ref = self.bq_client.dataset(self.dataset_id)
        try:
            self.dataset = self.bq_client.get_dataset(self.dataset_ref)
        except exceptions.NotFound:
            dataset = bigquery.Dataset(self.dataset_ref)
            self.dataset = self.bq_client.create_dataset(dataset)
        schema_path = 'test_schemas/test_schema.json'
        abs_path = os.path.abspath(os.path.dirname(__file__))
        self.schema_path = os.path.join(abs_path, schema_path)
        schema = user_schema.UserSchema(self.schema_path)
        self.bq_schema = schema.translate_json_schema()
        self.user_info_updates_id = 'test_user_info_updates'
        self.user_info_updates_table = self.create_table(
            self.user_info_updates_id)
        self.temp_user_info_updates_id = 'test_temp_user_info_updates'
        self.temp_user_info_updates_table = self.create_table(
            self.temp_user_info_updates_id)
        self.user_info_final_id = 'test_user_info_final'
        self.user_info_final_table = self.create_table(self.user_info_final_id) 
Example #8
Source File: test_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def setup(self):
        """Sets up resources for tests.
        """
        self.bq_client = bigquery.Client()
        self.dataset_id = 'user_updater_test'
        self.dataset_ref = self.bq_client.dataset(self.dataset_id)
        try:
            self.dataset = self.bq_client.get_dataset(self.dataset_ref)
        except exceptions.NotFound:
            dataset = bigquery.Dataset(self.dataset_ref)
            self.dataset = self.bq_client.create_dataset(dataset)
        schema_path = 'test_schemas/test_schema.json'
        abs_path = os.path.abspath(os.path.dirname(__file__))
        self.schema_path = os.path.join(abs_path, schema_path)
        schema = user_schema.UserSchema(self.schema_path)
        self.bq_schema = schema.translate_json_schema()
        self.user_info_updates_id = 'test_user_info_updates'
        self.user_info_updates_table = self.create_table(
            self.user_info_updates_id)
        self.temp_user_info_updates_id = 'test_temp_user_info_updates'
        self.temp_user_info_updates_table = self.create_table(
            self.temp_user_info_updates_id)
        self.user_info_final_id = 'test_user_info_final'
        self.user_info_final_table = self.create_table(self.user_info_final_id) 
Example #9
Source File: bigquery_helpers.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def bq_create_dataset(bq_client):
    """Creates the BigQuery dataset.

  If the dataset already exists, the existing dataset will be returned.
  Dataset will be create in the location specified by DATASET_LOCATION.

  Args:
    bq_client: BigQuery client

  Returns:
    BigQuery dataset that will be used to store data.
  """
    dataset_id = "{}.{}".format(bq_client.project, DATASET_NAME)
    dataset = bigquery.Dataset(dataset_id)
    dataset.location = DATASET_LOCATION
    dataset = bq_client.create_dataset(dataset, exists_ok=True)
    return dataset 
Example #10
Source File: bq.py    From geomancer with MIT License 6 votes vote down vote up
def _fetch_dataset(self, dataset_id):
        """Fetch a BigQuery Dataset if it exists, else, create a new one

        Parameters
        ----------
        dataset_id : str
            ID to name the created Dataset

        Returns
        -------
        :class:`google.cloud.bigquery.dataset.Dataset`
            The Dataset class to build tables from
        """
        dataset_ref = self.client.dataset(dataset_id)
        dataset = bigquery.Dataset(dataset_ref)
        try:
            dataset = self.client.create_dataset(dataset)
        except Conflict:
            dataset = self.client.get_dataset(dataset_ref)

        return dataset 
Example #11
Source File: bigQueryUtil.py    From CDSS with GNU General Public License v3.0 6 votes vote down vote up
def create_new_dataset(self, dataset_id):
        '''
        https://cloud.google.com/bigquery/docs/datasets#create-dataset

        :param dataset_id: dataset name
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)

        try:
            # Check if the dataset with specified ID already exists
            self.client.get_dataset(dataset_ref)
            log.info('Dataset {} already exists! Skipping create operation.'.format(dataset_id))
            #print(f'Dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Dataset object to send to the API.
            dataset = bigquery.Dataset(dataset_ref)
            dataset.location = 'US'
            dataset = self.client.create_dataset(dataset)  # API request
            log.info('Dataset {} created successfully project: {}.'.format(dataset.dataset_id, self.client.project))
            #print(f'Dataset {dataset.dataset_id} created successfully project: {self.client.project}.') 
Example #12
Source File: BigQueryConnect_py2.py    From CDSS with GNU General Public License v3.0 6 votes vote down vote up
def create_new_dataset(self, dataset_id):
        '''
        https://cloud.google.com/bigquery/docs/datasets#create-dataset

        :param dataset_id: dataset name
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)

        try:
            # Check if the dataset with specified ID already exists
            self.client.get_dataset(dataset_ref)
            log.info('Dataset {} already exists! Skipping create operation.'.format(dataset_id))
            #print(f'Dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Dataset object to send to the API.
            dataset = bigquery.Dataset(dataset_ref)
            dataset.location = 'US'
            dataset = self.client.create_dataset(dataset)  # API request
            log.info('Dataset {} created successfully project: {}.'.format(dataset.dataset_id, self.client.project))
            #print(f'Dataset {dataset.dataset_id} created successfully project: {self.client.project}.') 
Example #13
Source File: gbq.py    From pandas-gbq with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def create(self, dataset_id):
        """ Create a dataset in Google BigQuery

        Parameters
        ----------
        dataset : str
            Name of dataset to be written
        """
        from google.cloud.bigquery import Dataset

        if self.exists(dataset_id):
            raise DatasetCreationError(
                "Dataset {0} already " "exists".format(dataset_id)
            )

        dataset = Dataset(self.client.dataset(dataset_id))

        if self.location is not None:
            dataset.location = self.location

        try:
            self.client.create_dataset(dataset)
        except self.http_error as ex:
            self.process_http_error(ex) 
Example #14
Source File: analyzer.py    From bigquery-view-analyzer with MIT License 6 votes vote down vote up
def format_tree(self, show_key=False, show_status=False):
        log.info(f"Formatting tree...")
        tree_string = ""
        key = {
            "project": (Fore.CYAN + "◉" + Fore.RESET + " = Project".ljust(12)),
            "dataset": (Fore.YELLOW + "◉" + Fore.RESET + " = Dataset".ljust(12)),
            "table": (Fore.RED + "◉" + Fore.RESET + " = Table".ljust(12)),
            "view": (Fore.GREEN + "◉" + Fore.RESET + " = View".ljust(12)),
        }
        if show_key:
            tree_string += "Key:\n{}{}\n{}{}\n\n".format(
                key["project"], key["table"], key["dataset"], key["view"]
            )
        for pre, _, node in RenderTree(self.tree):
            tree_string += "%s%s\n" % (
                pre,
                node.pretty_name(show_authorization_status=show_status),
            )
        return tree_string 
Example #15
Source File: BigQueryConnect.py    From CDSS with GNU General Public License v3.0 6 votes vote down vote up
def create_new_dataset(self, dataset_id: str) -> None:
        '''
        https://cloud.google.com/bigquery/docs/datasets#create-dataset

        :param dataset_id: dataset name
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)

        try:
            # Check if the dataset with specified ID already exists
            self.client.get_dataset(dataset_ref)
            print(f'Dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Dataset object to send to the API.
            dataset = bigquery.Dataset(dataset_ref)
            dataset.location = 'US'
            dataset = self.client.create_dataset(dataset)  # API request
            print(f'Dataset {dataset.dataset_id} created successfully project: {self.client.project}.') 
Example #16
Source File: bigquery.py    From openprescribing with MIT License 6 votes vote down vote up
def __init__(self, dataset_key=None):
        self.project = settings.BQ_PROJECT

        # If this raises a DefaultCredentialsError:
        #  * on a developer's machine, run `gcloud auth application-default login`
        #   to use OAuth
        #  * elsewhere, ensure that GOOGLE_APPLICATION_CREDENTIALS is set and
        #    points to a valid set of credentials for a service account
        #
        # A warning is raised when authenticating with OAuth, recommending that
        # server applications use a service account.  We can ignore this.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self.gcbq_client = gcbq.Client(project=self.project)

        self.dataset_key = dataset_key

        if dataset_key is None:
            self.dataset_id = None
            self.dataset = None
        else:
            self.dataset_id = DATASETS[dataset_key]
            dataset_ref = self.gcbq_client.dataset(self.dataset_id)
            self.dataset = gcbq.Dataset(dataset_ref) 
Example #17
Source File: bigquery.py    From loaner with Apache License 2.0 6 votes vote down vote up
def initialize_tables(self):
    """Performs first-time setup by creating dataset/tables."""
    if constants.ON_LOCAL:
      logging.debug('On local, not connecting to BQ.')
      return

    logging.info('Beginning BigQuery initialization.')
    dataset = bigquery.Dataset(self._dataset_ref)
    try:
      dataset = self._client.create_dataset(dataset)
    except cloud.exceptions.Conflict:
      logging.warning('Dataset %s already exists, not creating.',
                      dataset.dataset_id)
    else:
      logging.info('Dataset %s successfully created.', dataset.dataset_id)

    self._create_table(constants.BIGQUERY_DEVICE_TABLE, device_model.Device())
    self._create_table(constants.BIGQUERY_SHELF_TABLE, shelf_model.Shelf())
    self._create_table(constants.BIGQUERY_SURVEY_TABLE,
                       survey_models.Question())

    logging.info('BigQuery successfully initialized.') 
Example #18
Source File: group_sync.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def create_group_members_table(self, dataset_id, groups_users_table_name):
    """Creates a BigQuery table to store group membership data.

    If a table with the given name in the given dataset already exists, will
    assume it is already created properly and keep it as is. If the given
    dataset does not already exist, it will also be created.

    Args:
      dataset_id: id of dataset in which to create the table. If it doesn't
        exist, it will be created.
      groups_users_table_name: name of table to be created if it doesn't exist.

    Returns:
      A reference to the new or existing table.
    """
    dataset_ref = self.bq_client.dataset(dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    try:
      self.bq_client.create_dataset(dataset)
    except google.api_core.exceptions.Conflict:
      # Assume dataset already exists.
      pass

    schema = [
        bigquery.SchemaField('group', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('user', 'STRING', mode='REQUIRED'),
    ]
    table_ref = dataset_ref.table(groups_users_table_name)
    table = bigquery.Table(table_ref, schema=schema)
    try:
      self.bq_client.create_table(table)
    except google.api_core.exceptions.Conflict:
      # Assume table already exists.
      pass
    return table_ref 
Example #19
Source File: bigquery.py    From openprescribing with MIT License 5 votes vote down vote up
def dataset_is_missing(exception):
    return isinstance(exception, NotFound) and "Not found: Dataset" in str(exception) 
Example #20
Source File: test_dataset_exists.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_dataset_exists(capsys, random_dataset_id, client):

    dataset_exists.dataset_exists(random_dataset_id)
    out, err = capsys.readouterr()
    assert "Dataset {} is not found".format(random_dataset_id) in out
    dataset = bigquery.Dataset(random_dataset_id)
    dataset = client.create_dataset(dataset)
    dataset_exists.dataset_exists(random_dataset_id)
    out, err = capsys.readouterr()
    assert "Dataset {} already exists".format(random_dataset_id) in out 
Example #21
Source File: test_copy_table_multiple_source.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_copy_table_multiple_source(capsys, random_table_id, random_dataset_id, client):

    dataset = bigquery.Dataset(random_dataset_id)
    dataset.location = "US"
    dataset = client.create_dataset(dataset)
    table_data = {"table1": b"Washington,WA", "table2": b"California,CA"}
    for table_id, data in table_data.items():
        table_ref = dataset.table(table_id)
        job_config = bigquery.LoadJobConfig(
            schema=[
                bigquery.SchemaField("name", "STRING"),
                bigquery.SchemaField("post_abbr", "STRING"),
            ]
        )
        body = six.BytesIO(data)
        client.load_table_from_file(
            body, table_ref, location="US", job_config=job_config
        ).result()

    table_ids = [
        "{}.table1".format(random_dataset_id),
        "{}.table2".format(random_dataset_id),
    ]

    copy_table_multiple_source.copy_table_multiple_source(random_table_id, table_ids)
    dest_table = client.get_table(random_table_id)
    out, err = capsys.readouterr()
    assert (
        "The tables {} have been appended to {}".format(table_ids, random_table_id)
        in out
    )
    assert dest_table.num_rows > 0 
Example #22
Source File: mock_bigquery_client.py    From ethereum-etl-airflow with MIT License 5 votes vote down vote up
def create_dataset(self, dataset_ref):
        return Dataset(dataset_ref) 
Example #23
Source File: load_file_generator.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def _get_staging_tables(self, dataset_ref):
        """Internal method for getting list of staging tables.

        Args:
            dataset_ref(google.cloud.bigquery.dataset.DatasetReference):
            Pointer to the dataset that contains the staging tables.

        Returns:
            List of google.cloud.bigquery.table.TableListItem, representing
            tables that are in the staging dataset.
        """

        primitive_staging_dataset = bigquery.Dataset(dataset_ref)
        return list(self.bq_client.list_tables(primitive_staging_dataset)) 
Example #24
Source File: bigquery_resource_util.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def create_resources(self):
        """Creates dataset and all three user tables.
        """
        dataset = bigquery.Dataset(self.dataset_ref)
        self.dataset = self.bq_client.create_dataset(dataset)
        logging.info('{0:s} Created Dataset {1:s}'.format(
            str(datetime.datetime.now()), self.dataset_id))
        schema = user_schema.UserSchema(self.schema_path)
        user_tables_schema = schema.translate_json_schema()
        self.create_table(self.updates_table_id, user_tables_schema)
        self.create_table(self.temp_updates_table_id, user_tables_schema)
        self.create_table(self.final_table_id, user_tables_schema) 
Example #25
Source File: client.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None):
        """Fetch the dataset referenced by ``dataset_ref``

        Args:
            dataset_ref (Union[ \
                google.cloud.bigquery.dataset.DatasetReference, \
                str, \
            ]):
                A reference to the dataset to fetch from the BigQuery API.
                If a string is passed in, this method attempts to create a
                dataset reference from a string using
                :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`.
            retry (Optional[google.api_core.retry.Retry]):
                How to retry the RPC.
            timeout (Optional[float]):
                The number of seconds to wait for the underlying HTTP transport
                before using ``retry``.

        Returns:
            google.cloud.bigquery.dataset.Dataset:
                A ``Dataset`` instance.
        """
        if isinstance(dataset_ref, str):
            dataset_ref = DatasetReference.from_string(
                dataset_ref, default_project=self.project
            )

        api_response = self._call_api(
            retry, method="GET", path=dataset_ref.path, timeout=timeout
        )
        return Dataset.from_api_repr(api_response) 
Example #26
Source File: analyzer.py    From bigquery-view-analyzer with MIT License 5 votes vote down vote up
def dataset(self) -> Dataset:
        dataset_ref = client.dataset(self.table.dataset_id, project=self.table.project)
        return client.get_dataset(dataset_ref) 
Example #27
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_create_table_cmek(client, to_delete):
    dataset_id = "create_table_cmek_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_create_table_cmek]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # dataset_id = 'my_dataset'

    table_ref = dataset.table("my_table")
    table = bigquery.Table(table_ref)

    # Set the encryption key to use for the table.
    # TODO: Replace this key with a key you have created in Cloud KMS.
    kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
        "cloud-samples-tests", "us", "test", "test"
    )
    table.encryption_configuration = bigquery.EncryptionConfiguration(
        kms_key_name=kms_key_name
    )

    table = client.create_table(table)  # API request

    assert table.encryption_configuration.kms_key_name == kms_key_name
    # [END bigquery_create_table_cmek] 
Example #28
Source File: quickstart.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def run_quickstart(override_values={}):
    # [START bigquery_quickstart]
    # Imports the Google Cloud client library
    from google.cloud import bigquery

    # Instantiates a client
    bigquery_client = bigquery.Client()

    # The name for the new dataset
    dataset_id = 'my_new_dataset'

    # [END bigquery_quickstart]
    # To facilitate testing, we replace values with alternatives
    # provided by the testing harness.
    dataset_id = override_values.get("dataset_id", dataset_id)
    # [START bigquery_quickstart]

    # Prepares a reference to the new dataset
    dataset_ref = bigquery_client.dataset(dataset_id)
    dataset = bigquery.Dataset(dataset_ref)

    # Creates the new dataset
    dataset = bigquery_client.create_dataset(dataset)

    print('Dataset {} created.'.format(dataset.dataset_id))
    # [END bigquery_quickstart] 
Example #29
Source File: samples_test.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def temp_dataset():
    from google.cloud import bigquery

    client = bigquery.Client()
    dataset_id = "temp_dataset_{}".format(int(time.time() * 1000))
    dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
    dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
    yield dataset
    client.delete_dataset(dataset, delete_contents=True) 
Example #30
Source File: dataflowtemplateoperator_create_dataset_and_table_helper.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def create_dataset_and_table(project, location, dataset_name):
    # Construct a BigQuery client object.
    client = bigquery.Client(project)

    dataset_id = f"{project}.{dataset_name}"

    # Construct a full Dataset object to send to the API.
    dataset = bigquery.Dataset(dataset_id)

    # Set the location to your desired location for the dataset.
    # For more information, see this link:
    # https://cloud.google.com/bigquery/docs/locations
    dataset.location = location

    # Send the dataset to the API for creation.
    # Raises google.api_core.exceptions.Conflict if the Dataset already
    # exists within the project.
    dataset = client.create_dataset(dataset)  # Make an API request.

    print(f"Created dataset {client.project}.{dataset.dataset_id}")

    # Create a table from this dataset.

    table_id = f"{client.project}.{dataset_name}.average_weather"

    schema = [
        bigquery.SchemaField("location", "GEOGRAPHY", mode="REQUIRED"),
        bigquery.SchemaField("average_temperature", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("month", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("inches_of_rain", "NUMERIC", mode="NULLABLE"),
        bigquery.SchemaField("is_current", "BOOLEAN", mode="NULLABLE"),
        bigquery.SchemaField("latest_measurement", "DATE", mode="NULLABLE"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}")

    # [END composer_dataflow_dataset_table_creation]
    return dataset, table