Python google.cloud.bigquery.Dataset() Examples
The following are 30
code examples of google.cloud.bigquery.Dataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
google.cloud.bigquery
, or try the search function
.
Example #1
Source File: test_nested_user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def setup(self): """Sets up resources for tests. """ self.bq_client = bigquery.Client() self.dataset_id = 'user_updater_test' self.dataset_ref = self.bq_client.dataset(self.dataset_id) try: self.dataset = self.bq_client.get_dataset(self.dataset_ref) except exceptions.NotFound: dataset = bigquery.Dataset(self.dataset_ref) self.dataset = self.bq_client.create_dataset(dataset) schema_path = 'test_schemas/test_nested_schema.json' abs_path = os.path.abspath(os.path.dirname(__file__)) self.schema_path = os.path.join(abs_path, schema_path) schema = user_schema.UserSchema(self.schema_path) self.bq_schema = schema.translate_json_schema() self.user_info_updates_id = 'test_nested_user_info_updates' self.user_info_updates_table = self.create_table( self.user_info_updates_id) self.temp_user_info_updates_id = 'test_nested_temp_user_info_updates' self.temp_user_info_updates_table = self.create_table( self.temp_user_info_updates_id) self.user_info_final_id = 'test_nested_user_info_final' self.user_info_final_table = self.create_table(self.user_info_final_id)
Example #2
Source File: run_vcf_to_bq_tests.py From gcp-variant-transforms with Apache License 2.0 | 6 votes |
def __enter__(self): if not self.revalidation_dataset_id: client = bigquery.Client(project=self.project) dataset_ref = client.dataset(self.dataset_id) dataset = bigquery.Dataset(dataset_ref) dataset.location = 'US' _ = client.create_dataset(dataset) # See #171, pylint: disable=no-member return self
Example #3
Source File: create_dataset.py From python-bigquery with Apache License 2.0 | 6 votes |
def create_dataset(dataset_id): # [START bigquery_create_dataset] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set dataset_id to the ID of the dataset to create. # dataset_id = "{}.your_dataset".format(client.project) # Construct a full Dataset object to send to the API. dataset = bigquery.Dataset(dataset_id) # TODO(developer): Specify the geographic location where the dataset should reside. dataset.location = "US" # Send the dataset to the API for creation, with an explicit timeout. # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. dataset = client.create_dataset(dataset, timeout=30) # Make an API request. print("Created dataset {}.{}".format(client.project, dataset.dataset_id)) # [END bigquery_create_dataset]
Example #4
Source File: test_query_generator.py From professional-services with Apache License 2.0 | 6 votes |
def setUp(self): """Sets up resources for tests. """ self.bq_client = bigquery.Client() self.dataset_id = 'bq_benchmark_test_dataset' self.dataset_ref = self.bq_client.dataset(self.dataset_id) dataset = bigquery.Dataset(self.dataset_ref) self.dataset = self.bq_client.create_dataset(dataset) self.table_id = 'test_table' abs_path = os.path.abspath(os.path.dirname(__file__)) json_schema_filename = os.path.join(abs_path, 'test_schemas/test_schema.json') self.table_util = table_util.TableUtil( table_id=self.table_id, dataset_id=self.dataset_id, json_schema_filename=json_schema_filename, ) self.table_util.create_table() self.test_query_generator = query_generator.QueryGenerator( table_id=self.table_id, dataset_id=self.dataset_id)
Example #5
Source File: test_table_util.py From professional-services with Apache License 2.0 | 6 votes |
def setUp(self): """Sets up resources for tests. """ self.bq_client = bigquery.Client() self.dataset_id = 'bq_benchmark_test_dataset' self.dataset_ref = self.bq_client.dataset(self.dataset_id) dataset = bigquery.Dataset(self.dataset_ref) self.dataset = self.bq_client.create_dataset(dataset) self.table_id = 'test_table' abs_path = os.path.abspath(os.path.dirname(__file__)) json_schema_filename = os.path.join(abs_path, 'test_schemas/test_schema.json') self.table_util = table_util.TableUtil( table_id=self.table_id, dataset_id=self.dataset_id, json_schema_filename=json_schema_filename, )
Example #6
Source File: test_nested_user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def setup(self): """Sets up resources for tests. """ self.bq_client = bigquery.Client() self.dataset_id = 'user_updater_test' self.dataset_ref = self.bq_client.dataset(self.dataset_id) try: self.dataset = self.bq_client.get_dataset(self.dataset_ref) except exceptions.NotFound: dataset = bigquery.Dataset(self.dataset_ref) self.dataset = self.bq_client.create_dataset(dataset) schema_path = 'test_schemas/test_nested_schema.json' abs_path = os.path.abspath(os.path.dirname(__file__)) self.schema_path = os.path.join(abs_path, schema_path) schema = user_schema.UserSchema(self.schema_path) self.bq_schema = schema.translate_json_schema() self.user_info_updates_id = 'test_nested_user_info_updates' self.user_info_updates_table = self.create_table( self.user_info_updates_id) self.temp_user_info_updates_id = 'test_nested_temp_user_info_updates' self.temp_user_info_updates_table = self.create_table( self.temp_user_info_updates_id) self.user_info_final_id = 'test_nested_user_info_final' self.user_info_final_table = self.create_table(self.user_info_final_id)
Example #7
Source File: test_user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def setup(self): """Sets up resources for tests. """ self.bq_client = bigquery.Client() self.dataset_id = 'user_updater_test' self.dataset_ref = self.bq_client.dataset(self.dataset_id) try: self.dataset = self.bq_client.get_dataset(self.dataset_ref) except exceptions.NotFound: dataset = bigquery.Dataset(self.dataset_ref) self.dataset = self.bq_client.create_dataset(dataset) schema_path = 'test_schemas/test_schema.json' abs_path = os.path.abspath(os.path.dirname(__file__)) self.schema_path = os.path.join(abs_path, schema_path) schema = user_schema.UserSchema(self.schema_path) self.bq_schema = schema.translate_json_schema() self.user_info_updates_id = 'test_user_info_updates' self.user_info_updates_table = self.create_table( self.user_info_updates_id) self.temp_user_info_updates_id = 'test_temp_user_info_updates' self.temp_user_info_updates_table = self.create_table( self.temp_user_info_updates_id) self.user_info_final_id = 'test_user_info_final' self.user_info_final_table = self.create_table(self.user_info_final_id)
Example #8
Source File: test_user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def setup(self): """Sets up resources for tests. """ self.bq_client = bigquery.Client() self.dataset_id = 'user_updater_test' self.dataset_ref = self.bq_client.dataset(self.dataset_id) try: self.dataset = self.bq_client.get_dataset(self.dataset_ref) except exceptions.NotFound: dataset = bigquery.Dataset(self.dataset_ref) self.dataset = self.bq_client.create_dataset(dataset) schema_path = 'test_schemas/test_schema.json' abs_path = os.path.abspath(os.path.dirname(__file__)) self.schema_path = os.path.join(abs_path, schema_path) schema = user_schema.UserSchema(self.schema_path) self.bq_schema = schema.translate_json_schema() self.user_info_updates_id = 'test_user_info_updates' self.user_info_updates_table = self.create_table( self.user_info_updates_id) self.temp_user_info_updates_id = 'test_temp_user_info_updates' self.temp_user_info_updates_table = self.create_table( self.temp_user_info_updates_id) self.user_info_final_id = 'test_user_info_final' self.user_info_final_table = self.create_table(self.user_info_final_id)
Example #9
Source File: bigquery_helpers.py From professional-services with Apache License 2.0 | 6 votes |
def bq_create_dataset(bq_client): """Creates the BigQuery dataset. If the dataset already exists, the existing dataset will be returned. Dataset will be create in the location specified by DATASET_LOCATION. Args: bq_client: BigQuery client Returns: BigQuery dataset that will be used to store data. """ dataset_id = "{}.{}".format(bq_client.project, DATASET_NAME) dataset = bigquery.Dataset(dataset_id) dataset.location = DATASET_LOCATION dataset = bq_client.create_dataset(dataset, exists_ok=True) return dataset
Example #10
Source File: bq.py From geomancer with MIT License | 6 votes |
def _fetch_dataset(self, dataset_id): """Fetch a BigQuery Dataset if it exists, else, create a new one Parameters ---------- dataset_id : str ID to name the created Dataset Returns ------- :class:`google.cloud.bigquery.dataset.Dataset` The Dataset class to build tables from """ dataset_ref = self.client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) try: dataset = self.client.create_dataset(dataset) except Conflict: dataset = self.client.get_dataset(dataset_ref) return dataset
Example #11
Source File: bigQueryUtil.py From CDSS with GNU General Public License v3.0 | 6 votes |
def create_new_dataset(self, dataset_id): ''' https://cloud.google.com/bigquery/docs/datasets#create-dataset :param dataset_id: dataset name :return: None ''' dataset_ref = self.client.dataset(dataset_id) try: # Check if the dataset with specified ID already exists self.client.get_dataset(dataset_ref) log.info('Dataset {} already exists! Skipping create operation.'.format(dataset_id)) #print(f'Dataset {dataset_id} already exists! Skipping create operation.') except NotFound: # Construct a full Dataset object to send to the API. dataset = bigquery.Dataset(dataset_ref) dataset.location = 'US' dataset = self.client.create_dataset(dataset) # API request log.info('Dataset {} created successfully project: {}.'.format(dataset.dataset_id, self.client.project)) #print(f'Dataset {dataset.dataset_id} created successfully project: {self.client.project}.')
Example #12
Source File: BigQueryConnect_py2.py From CDSS with GNU General Public License v3.0 | 6 votes |
def create_new_dataset(self, dataset_id): ''' https://cloud.google.com/bigquery/docs/datasets#create-dataset :param dataset_id: dataset name :return: None ''' dataset_ref = self.client.dataset(dataset_id) try: # Check if the dataset with specified ID already exists self.client.get_dataset(dataset_ref) log.info('Dataset {} already exists! Skipping create operation.'.format(dataset_id)) #print(f'Dataset {dataset_id} already exists! Skipping create operation.') except NotFound: # Construct a full Dataset object to send to the API. dataset = bigquery.Dataset(dataset_ref) dataset.location = 'US' dataset = self.client.create_dataset(dataset) # API request log.info('Dataset {} created successfully project: {}.'.format(dataset.dataset_id, self.client.project)) #print(f'Dataset {dataset.dataset_id} created successfully project: {self.client.project}.')
Example #13
Source File: gbq.py From pandas-gbq with BSD 3-Clause "New" or "Revised" License | 6 votes |
def create(self, dataset_id): """ Create a dataset in Google BigQuery Parameters ---------- dataset : str Name of dataset to be written """ from google.cloud.bigquery import Dataset if self.exists(dataset_id): raise DatasetCreationError( "Dataset {0} already " "exists".format(dataset_id) ) dataset = Dataset(self.client.dataset(dataset_id)) if self.location is not None: dataset.location = self.location try: self.client.create_dataset(dataset) except self.http_error as ex: self.process_http_error(ex)
Example #14
Source File: analyzer.py From bigquery-view-analyzer with MIT License | 6 votes |
def format_tree(self, show_key=False, show_status=False): log.info(f"Formatting tree...") tree_string = "" key = { "project": (Fore.CYAN + "◉" + Fore.RESET + " = Project".ljust(12)), "dataset": (Fore.YELLOW + "◉" + Fore.RESET + " = Dataset".ljust(12)), "table": (Fore.RED + "◉" + Fore.RESET + " = Table".ljust(12)), "view": (Fore.GREEN + "◉" + Fore.RESET + " = View".ljust(12)), } if show_key: tree_string += "Key:\n{}{}\n{}{}\n\n".format( key["project"], key["table"], key["dataset"], key["view"] ) for pre, _, node in RenderTree(self.tree): tree_string += "%s%s\n" % ( pre, node.pretty_name(show_authorization_status=show_status), ) return tree_string
Example #15
Source File: BigQueryConnect.py From CDSS with GNU General Public License v3.0 | 6 votes |
def create_new_dataset(self, dataset_id: str) -> None: ''' https://cloud.google.com/bigquery/docs/datasets#create-dataset :param dataset_id: dataset name :return: None ''' dataset_ref = self.client.dataset(dataset_id) try: # Check if the dataset with specified ID already exists self.client.get_dataset(dataset_ref) print(f'Dataset {dataset_id} already exists! Skipping create operation.') except NotFound: # Construct a full Dataset object to send to the API. dataset = bigquery.Dataset(dataset_ref) dataset.location = 'US' dataset = self.client.create_dataset(dataset) # API request print(f'Dataset {dataset.dataset_id} created successfully project: {self.client.project}.')
Example #16
Source File: bigquery.py From openprescribing with MIT License | 6 votes |
def __init__(self, dataset_key=None): self.project = settings.BQ_PROJECT # If this raises a DefaultCredentialsError: # * on a developer's machine, run `gcloud auth application-default login` # to use OAuth # * elsewhere, ensure that GOOGLE_APPLICATION_CREDENTIALS is set and # points to a valid set of credentials for a service account # # A warning is raised when authenticating with OAuth, recommending that # server applications use a service account. We can ignore this. with warnings.catch_warnings(): warnings.simplefilter("ignore") self.gcbq_client = gcbq.Client(project=self.project) self.dataset_key = dataset_key if dataset_key is None: self.dataset_id = None self.dataset = None else: self.dataset_id = DATASETS[dataset_key] dataset_ref = self.gcbq_client.dataset(self.dataset_id) self.dataset = gcbq.Dataset(dataset_ref)
Example #17
Source File: bigquery.py From loaner with Apache License 2.0 | 6 votes |
def initialize_tables(self): """Performs first-time setup by creating dataset/tables.""" if constants.ON_LOCAL: logging.debug('On local, not connecting to BQ.') return logging.info('Beginning BigQuery initialization.') dataset = bigquery.Dataset(self._dataset_ref) try: dataset = self._client.create_dataset(dataset) except cloud.exceptions.Conflict: logging.warning('Dataset %s already exists, not creating.', dataset.dataset_id) else: logging.info('Dataset %s successfully created.', dataset.dataset_id) self._create_table(constants.BIGQUERY_DEVICE_TABLE, device_model.Device()) self._create_table(constants.BIGQUERY_SHELF_TABLE, shelf_model.Shelf()) self._create_table(constants.BIGQUERY_SURVEY_TABLE, survey_models.Question()) logging.info('BigQuery successfully initialized.')
Example #18
Source File: group_sync.py From professional-services with Apache License 2.0 | 5 votes |
def create_group_members_table(self, dataset_id, groups_users_table_name): """Creates a BigQuery table to store group membership data. If a table with the given name in the given dataset already exists, will assume it is already created properly and keep it as is. If the given dataset does not already exist, it will also be created. Args: dataset_id: id of dataset in which to create the table. If it doesn't exist, it will be created. groups_users_table_name: name of table to be created if it doesn't exist. Returns: A reference to the new or existing table. """ dataset_ref = self.bq_client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) try: self.bq_client.create_dataset(dataset) except google.api_core.exceptions.Conflict: # Assume dataset already exists. pass schema = [ bigquery.SchemaField('group', 'STRING', mode='REQUIRED'), bigquery.SchemaField('user', 'STRING', mode='REQUIRED'), ] table_ref = dataset_ref.table(groups_users_table_name) table = bigquery.Table(table_ref, schema=schema) try: self.bq_client.create_table(table) except google.api_core.exceptions.Conflict: # Assume table already exists. pass return table_ref
Example #19
Source File: bigquery.py From openprescribing with MIT License | 5 votes |
def dataset_is_missing(exception): return isinstance(exception, NotFound) and "Not found: Dataset" in str(exception)
Example #20
Source File: test_dataset_exists.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_dataset_exists(capsys, random_dataset_id, client): dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() assert "Dataset {} is not found".format(random_dataset_id) in out dataset = bigquery.Dataset(random_dataset_id) dataset = client.create_dataset(dataset) dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() assert "Dataset {} already exists".format(random_dataset_id) in out
Example #21
Source File: test_copy_table_multiple_source.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_copy_table_multiple_source(capsys, random_table_id, random_dataset_id, client): dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" dataset = client.create_dataset(dataset) table_data = {"table1": b"Washington,WA", "table2": b"California,CA"} for table_id, data in table_data.items(): table_ref = dataset.table(table_id) job_config = bigquery.LoadJobConfig( schema=[ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ] ) body = six.BytesIO(data) client.load_table_from_file( body, table_ref, location="US", job_config=job_config ).result() table_ids = [ "{}.table1".format(random_dataset_id), "{}.table2".format(random_dataset_id), ] copy_table_multiple_source.copy_table_multiple_source(random_table_id, table_ids) dest_table = client.get_table(random_table_id) out, err = capsys.readouterr() assert ( "The tables {} have been appended to {}".format(table_ids, random_table_id) in out ) assert dest_table.num_rows > 0
Example #22
Source File: mock_bigquery_client.py From ethereum-etl-airflow with MIT License | 5 votes |
def create_dataset(self, dataset_ref): return Dataset(dataset_ref)
Example #23
Source File: load_file_generator.py From professional-services with Apache License 2.0 | 5 votes |
def _get_staging_tables(self, dataset_ref): """Internal method for getting list of staging tables. Args: dataset_ref(google.cloud.bigquery.dataset.DatasetReference): Pointer to the dataset that contains the staging tables. Returns: List of google.cloud.bigquery.table.TableListItem, representing tables that are in the staging dataset. """ primitive_staging_dataset = bigquery.Dataset(dataset_ref) return list(self.bq_client.list_tables(primitive_staging_dataset))
Example #24
Source File: bigquery_resource_util.py From professional-services with Apache License 2.0 | 5 votes |
def create_resources(self): """Creates dataset and all three user tables. """ dataset = bigquery.Dataset(self.dataset_ref) self.dataset = self.bq_client.create_dataset(dataset) logging.info('{0:s} Created Dataset {1:s}'.format( str(datetime.datetime.now()), self.dataset_id)) schema = user_schema.UserSchema(self.schema_path) user_tables_schema = schema.translate_json_schema() self.create_table(self.updates_table_id, user_tables_schema) self.create_table(self.temp_updates_table_id, user_tables_schema) self.create_table(self.final_table_id, user_tables_schema)
Example #25
Source File: client.py From python-bigquery with Apache License 2.0 | 5 votes |
def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): """Fetch the dataset referenced by ``dataset_ref`` Args: dataset_ref (Union[ \ google.cloud.bigquery.dataset.DatasetReference, \ str, \ ]): A reference to the dataset to fetch from the BigQuery API. If a string is passed in, this method attempts to create a dataset reference from a string using :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. Returns: google.cloud.bigquery.dataset.Dataset: A ``Dataset`` instance. """ if isinstance(dataset_ref, str): dataset_ref = DatasetReference.from_string( dataset_ref, default_project=self.project ) api_response = self._call_api( retry, method="GET", path=dataset_ref.path, timeout=timeout ) return Dataset.from_api_repr(api_response)
Example #26
Source File: analyzer.py From bigquery-view-analyzer with MIT License | 5 votes |
def dataset(self) -> Dataset: dataset_ref = client.dataset(self.table.dataset_id, project=self.table.project) return client.get_dataset(dataset_ref)
Example #27
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_create_table_cmek(client, to_delete): dataset_id = "create_table_cmek_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) # [START bigquery_create_table_cmek] # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' table_ref = dataset.table("my_table") table = bigquery.Table(table_ref) # Set the encryption key to use for the table. # TODO: Replace this key with a key you have created in Cloud KMS. kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( "cloud-samples-tests", "us", "test", "test" ) table.encryption_configuration = bigquery.EncryptionConfiguration( kms_key_name=kms_key_name ) table = client.create_table(table) # API request assert table.encryption_configuration.kms_key_name == kms_key_name # [END bigquery_create_table_cmek]
Example #28
Source File: quickstart.py From python-docs-samples with Apache License 2.0 | 5 votes |
def run_quickstart(override_values={}): # [START bigquery_quickstart] # Imports the Google Cloud client library from google.cloud import bigquery # Instantiates a client bigquery_client = bigquery.Client() # The name for the new dataset dataset_id = 'my_new_dataset' # [END bigquery_quickstart] # To facilitate testing, we replace values with alternatives # provided by the testing harness. dataset_id = override_values.get("dataset_id", dataset_id) # [START bigquery_quickstart] # Prepares a reference to the new dataset dataset_ref = bigquery_client.dataset(dataset_id) dataset = bigquery.Dataset(dataset_ref) # Creates the new dataset dataset = bigquery_client.create_dataset(dataset) print('Dataset {} created.'.format(dataset.dataset_id)) # [END bigquery_quickstart]
Example #29
Source File: samples_test.py From python-docs-samples with Apache License 2.0 | 5 votes |
def temp_dataset(): from google.cloud import bigquery client = bigquery.Client() dataset_id = "temp_dataset_{}".format(int(time.time() * 1000)) dataset_ref = bigquery.DatasetReference(client.project, dataset_id) dataset = client.create_dataset(bigquery.Dataset(dataset_ref)) yield dataset client.delete_dataset(dataset, delete_contents=True)
Example #30
Source File: dataflowtemplateoperator_create_dataset_and_table_helper.py From python-docs-samples with Apache License 2.0 | 5 votes |
def create_dataset_and_table(project, location, dataset_name): # Construct a BigQuery client object. client = bigquery.Client(project) dataset_id = f"{project}.{dataset_name}" # Construct a full Dataset object to send to the API. dataset = bigquery.Dataset(dataset_id) # Set the location to your desired location for the dataset. # For more information, see this link: # https://cloud.google.com/bigquery/docs/locations dataset.location = location # Send the dataset to the API for creation. # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. dataset = client.create_dataset(dataset) # Make an API request. print(f"Created dataset {client.project}.{dataset.dataset_id}") # Create a table from this dataset. table_id = f"{client.project}.{dataset_name}.average_weather" schema = [ bigquery.SchemaField("location", "GEOGRAPHY", mode="REQUIRED"), bigquery.SchemaField("average_temperature", "INTEGER", mode="REQUIRED"), bigquery.SchemaField("month", "STRING", mode="REQUIRED"), bigquery.SchemaField("inches_of_rain", "NUMERIC", mode="NULLABLE"), bigquery.SchemaField("is_current", "BOOLEAN", mode="NULLABLE"), bigquery.SchemaField("latest_measurement", "DATE", mode="NULLABLE"), ] table = bigquery.Table(table_id, schema=schema) table = client.create_table(table) # Make an API request. print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}") # [END composer_dataflow_dataset_table_creation] return dataset, table