Python google.cloud.bigquery.Client() Examples
The following are 30
code examples of google.cloud.bigquery.Client().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
google.cloud.bigquery
, or try the search function
.
Example #1
Source File: connection.py From insightconnect-plugins with MIT License | 8 votes |
def connect(self, params): self.logger.info(f"Connect: Connecting...") self.client = bigquery.Client( project=params.get(Input.PROJECT_ID), credentials=service_account.Credentials.from_service_account_info({ "type": "service_account", "project_id": params.get(Input.PROJECT_ID), "private_key_id": params.get(Input.PRIVATE_KEY_ID), "private_key": params.get(Input.PRIVATE_KEY).get("privateKey").replace('\\n', "\n", -1), "client_email": params.get(Input.CLIENT_EMAIL), "client_id": params.get(Input.CLIENT_ID), "auth_uri": params.get(Input.AUTH_URI), "client_x509_cert_url": params.get(Input.CLIENT_X509_CERT_URL), "token_uri": params.get(Input.TOKEN_URI, "https://oauth2.googleapis.com/token"), "auth_provider_x509_cert_url": params.get(Input.AUTH_PROVIDER_X509_CERT_URL, "https://www.googleapis.com/oauth2/v1/certs") }) )
Example #2
Source File: bq_writer.py From lookml-tools with Apache License 2.0 | 8 votes |
def _upload_to_gcs(self, gcs_project_id, target_bucket_name, bucket_folder, filename): '''upload CSV to file in GCS Args: gcs_project_id (str): project name target_bucket_name (str): name of GCS bucket bucket_folder (str): name of GCS folder filename (str): filepath to upload Returns: nothing. Side effect is that data is uploaded to GCS ''' storage_client = storage.Client(gcs_project_id) bucket = storage_client.get_bucket(target_bucket_name) path = bucket_folder + os.sep + filename logging.info("Loading to GCS: %s", path) blob = bucket.blob(path) #name in GCS blob.upload_from_filename(filename)
Example #3
Source File: samples_test.py From python-docs-samples with Apache License 2.0 | 7 votes |
def test_client_library_query_bqstorage(): # [START bigquery_migration_client_library_query_bqstorage] import google.auth from google.cloud import bigquery from google.cloud import bigquery_storage_v1beta1 # Create a BigQuery client and a BigQuery Storage API client with the same # credentials to avoid authenticating twice. credentials, project_id = google.auth.default( scopes=["https://www.googleapis.com/auth/cloud-platform"] ) client = bigquery.Client(credentials=credentials, project=project_id) bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( credentials=credentials ) sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`" # Use a BigQuery Storage API client to download results more quickly. df = client.query(sql).to_dataframe(bqstorage_client=bqstorage_client) # [END bigquery_migration_client_library_query_bqstorage] assert len(df) > 0
Example #4
Source File: deploy_app.py From runtimes-common with Apache License 2.0 | 6 votes |
def _record_latency_to_bigquery(deploy_latency, language, is_xrt): current_date = datetime.datetime.now() row = [(language, current_date, deploy_latency, is_xrt)] project = os.environ.get(DEPLOY_LATENCY_PROJECT_ENV) if not project: logging.warn('No project specified to record deployment latency!') logging.warn('If you wish to record deployment latency, \ please set %s env var and try again.', DEPLOY_LATENCY_PROJECT_ENV) return 0 logging.debug('Fetching bigquery client for project %s', project) client = bigquery.Client(project=project) dataset = client.dataset(DATASET_NAME) logging.debug('Writing bigquery data to table %s in dataset %s', TABLE_NAME, dataset) table_ref = bigquery.TableReference(dataset_ref=dataset, table_id=TABLE_NAME) table = client.get_table(table_ref) return client.create_rows(table, row)
Example #5
Source File: benchmark_uploader.py From models with Apache License 2.0 | 6 votes |
def __init__(self, logging_dir, gcp_project=None, credentials=None): """Initialized BigQueryUploader with proper setting. Args: logging_dir: string, logging directory that contains the benchmark log. gcp_project: string, the name of the GCP project that the log will be uploaded to. The default project name will be detected from local environment if no value is provided. credentials: google.auth.credentials. The credential to access the BigQuery service. The default service account credential will be detected from local environment if no value is provided. Please use google.oauth2.service_account.Credentials to load credential from local file for the case that the test is run out side of GCP. """ self._logging_dir = logging_dir self._bq_client = bigquery.Client( project=gcp_project, credentials=credentials)
Example #6
Source File: bq_to_xml.py From healthcare-deid with Apache License 2.0 | 6 votes |
def run(input_query, output_dir, task_name, id_columns, target_column): """Get the BigQuery data and write it to local files.""" if output_dir.startswith('gs://'): raise Exception('Writing the output to a GCS bucket is not supported; ' 'please write to a local directory. You can then upload ' 'your files using "gsutil cp".') bq_client = bigquery.Client() job_config = bigquery.job.QueryJobConfig() job_config.use_legacy_sql = True query_job = bq_client.query(input_query, job_config=job_config) results_table = query_job.result() for row in results_table: id_str = '-'.join([str(row.get(col)) for col in id_columns]) filename = os.path.join(output_dir, id_str + '.xml') with codecs.open(filename, 'w', encoding='utf-8') as f: f.write(TEMPLATE.format(task_name, row.get(target_column))) logging.info('Output written to "%s"', output_dir)
Example #7
Source File: server.py From healthcare-deid with Apache License 2.0 | 6 votes |
def verify_gcs_path(path): """Verifies that a GCS path exists. Args: path: A string that represents the target path. Returns: A boolean of the verification status. """ storage_client = storage.Client() path_info = gcsutil.GcsFileName.from_path(path) try: bucket = storage_client.get_bucket(path_info.bucket) except exceptions.NotFound: return False return storage.Blob(bucket=bucket, name=path_info.blob).exists(storage_client)
Example #8
Source File: main_test.py From python-docs-samples with Apache License 2.0 | 6 votes |
def clients(): # [START bigquerystorage_pandas_tutorial_all] # [START bigquerystorage_pandas_tutorial_create_client] import google.auth from google.cloud import bigquery from google.cloud import bigquery_storage_v1beta1 # Explicitly create a credentials object. This allows you to use the same # credentials for both the BigQuery and BigQuery Storage clients, avoiding # unnecessary API calls to fetch duplicate authentication tokens. credentials, your_project_id = google.auth.default( scopes=["https://www.googleapis.com/auth/cloud-platform"] ) # Make clients. bqclient = bigquery.Client( credentials=credentials, project=your_project_id, ) bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient( credentials=credentials ) # [END bigquerystorage_pandas_tutorial_create_client] # [END bigquerystorage_pandas_tutorial_all] return bqclient, bqstorageclient
Example #9
Source File: benchmark_uploader.py From models with Apache License 2.0 | 6 votes |
def __init__(self, logging_dir, gcp_project=None, credentials=None): """Initialized BigQueryUploader with proper setting. Args: logging_dir: string, logging directory that contains the benchmark log. gcp_project: string, the name of the GCP project that the log will be uploaded to. The default project name will be detected from local environment if no value is provided. credentials: google.auth.credentials. The credential to access the BigQuery service. The default service account credential will be detected from local environment if no value is provided. Please use google.oauth2.service_account.Credentials to load credential from local file for the case that the test is run out side of GCP. """ self._logging_dir = logging_dir self._bq_client = bigquery.Client( project=gcp_project, credentials=credentials)
Example #10
Source File: sqlalchemy_bigquery.py From pybigquery with MIT License | 6 votes |
def _create_client_from_credentials(self, credentials, default_query_job_config, project_id): if project_id is None: project_id = credentials.project_id scopes = ( 'https://www.googleapis.com/auth/bigquery', 'https://www.googleapis.com/auth/cloud-platform', 'https://www.googleapis.com/auth/drive' ) credentials = credentials.with_scopes(scopes) self._add_default_dataset_to_job_config(default_query_job_config, project_id, self.dataset_id) return bigquery.Client( project=project_id, credentials=credentials, location=self.location, default_query_job_config=default_query_job_config, )
Example #11
Source File: samples_test.py From python-docs-samples with Apache License 2.0 | 6 votes |
def test_client_library_legacy_query(): # [START bigquery_migration_client_library_query_legacy] from google.cloud import bigquery client = bigquery.Client() sql = """ SELECT name FROM [bigquery-public-data:usa_names.usa_1910_current] WHERE state = 'TX' LIMIT 100 """ query_config = bigquery.QueryJobConfig(use_legacy_sql=True) df = client.query(sql, job_config=query_config).to_dataframe() # [END bigquery_migration_client_library_query_legacy] assert len(df) > 0
Example #12
Source File: kaggle_gcp.py From docker-python with Apache License 2.0 | 6 votes |
def init_gcs(): is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ from google.cloud import storage if not is_user_secrets_token_set: return storage from kaggle_gcp import get_integrations if not get_integrations().has_gcs(): return storage from kaggle_secrets import GcpTarget from kaggle_gcp import KaggleKernelCredentials monkeypatch_client( storage.Client, KaggleKernelCredentials(target=GcpTarget.GCS)) return storage
Example #13
Source File: samples_test.py From python-docs-samples with Apache License 2.0 | 6 votes |
def test_client_library_query(): # [START bigquery_migration_client_library_query] from google.cloud import bigquery client = bigquery.Client() sql = """ SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE state = 'TX' LIMIT 100 """ # Run a Standard SQL query using the environment's default project df = client.query(sql).to_dataframe() # Run a Standard SQL query with the project set explicitly project_id = 'your-project-id' # [END bigquery_migration_client_library_query] assert len(df) > 0 project_id = os.environ['GOOGLE_CLOUD_PROJECT'] # [START bigquery_migration_client_library_query] df = client.query(sql, project=project_id).to_dataframe() # [END bigquery_migration_client_library_query] assert len(df) > 0
Example #14
Source File: simple_app.py From python-docs-samples with Apache License 2.0 | 6 votes |
def query_stackoverflow(): # [START bigquery_simple_app_client] client = bigquery.Client() # [END bigquery_simple_app_client] # [START bigquery_simple_app_query] query_job = client.query(""" SELECT CONCAT( 'https://stackoverflow.com/questions/', CAST(id as STRING)) as url, view_count FROM `bigquery-public-data.stackoverflow.posts_questions` WHERE tags like '%google-bigquery%' ORDER BY view_count DESC LIMIT 10""") results = query_job.result() # Waits for job to complete. # [END bigquery_simple_app_query] # [START bigquery_simple_app_print] for row in results: print("{} : {} views".format(row.url, row.view_count)) # [END bigquery_simple_app_print]
Example #15
Source File: client.py From ibis with Apache License 2.0 | 6 votes |
def __init__(self, project_id, dataset_id=None, credentials=None): """Construct a BigQueryClient. Parameters ---------- project_id : str A project name dataset_id : Optional[str] A ``<project_id>.<dataset_id>`` string or just a dataset name credentials : google.auth.credentials.Credentials """ ( self.data_project, self.billing_project, self.dataset, ) = parse_project_and_dataset(project_id, dataset_id) self.client = bq.Client( project=self.data_project, credentials=credentials )
Example #16
Source File: benchmark.py From runtimes-common with Apache License 2.0 | 6 votes |
def _record_build_times_to_bigquery(self, build_times): current_date = datetime.datetime.now() logging.info('Retrieving bigquery client') client = bigquery.Client(project=self._project) dataset_ref = client.dataset(self._dataset) table_ref = dataset_ref.table(self._table) table = client.get_table(table_ref) full_name = "{0}:{1}.{2}".format(self._project, self._dataset, self._table) logging.info("Adding build time data to {0}".format(full_name)) rows = [(current_date, self._description, bt[0], bt[1]) for bt in build_times] client.create_rows(table, rows) logging.info("Finished adding build times to {0}".format(full_name))
Example #17
Source File: test_auth.py From pandas-gbq with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _try_credentials(project_id, credentials): from google.cloud import bigquery import google.api_core.exceptions import google.auth.exceptions if not credentials: return None if not project_id: return credentials try: client = bigquery.Client(project=project_id, credentials=credentials) # Check if the application has rights to the BigQuery project client.query("SELECT 1").result() return credentials except google.api_core.exceptions.GoogleAPIError: return None except google.auth.exceptions.RefreshError: # Sometimes (such as on Travis) google-auth returns GCE credentials, # but fetching the token for those credentials doesn't actually work. # See: # https://github.com/googleapis/google-auth-library-python/issues/287 return None
Example #18
Source File: main.py From BigQuery-integrations with MIT License | 6 votes |
def give_file_gbq(path_to_file, bq_configuration): """ Download file from *path_to_file* to BigQuery table using *bq_configuration* settings. """ # construct Client object with the path to the table in which data will be stored client = bigquery.Client(project = bq_configuration["project_id"]) dataset_ref = client.dataset(bq_configuration["dataset_id"]) table_ref = dataset_ref.table(bq_configuration["table_id"]) # determine uploading options job_config = bigquery.LoadJobConfig() job_config.source_format = "NEWLINE_DELIMITED_JSON" job_config.write_disposition = bq_configuration["write_disposition"] job_config.autodetect = True # upload the file to BigQuery table with open(path_to_file, "rb") as source_file: job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config) job.result() print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
Example #19
Source File: main.py From BigQuery-integrations with MIT License | 6 votes |
def give_file_gbq(path_to_file, bq_configuration): """ Download file from *path_to_file* to BigQuery table using *bq_configuration* settings. """ # construct Client object with the path to the table in which data will be stored client = bigquery.Client(project = bq_configuration["project_id"]) dataset_ref = client.dataset(bq_configuration["dataset_id"]) table_ref = dataset_ref.table(bq_configuration["table_id"]) # determine uploading options job_config = bigquery.LoadJobConfig() job_config.source_format = bq_configuration["source_format"].upper() job_config.write_disposition = bq_configuration["write_disposition"] if bq_configuration["source_format"].upper() == "CSV": job_config.field_delimiter = bq_configuration["delimiter"] job_config.skip_leading_rows = 1 job_config.autodetect = True # upload the file to BigQuery table with open(path_to_file, "rb") as source_file: job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config) job.result() print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".") os.remove(path_to_file)
Example #20
Source File: main.py From BigQuery-integrations with MIT License | 6 votes |
def give_file_gbq(path_to_file, bq_configuration): """ Download file from *path_to_file* to BigQuery table using *bq_configuration* settings. """ # construct Client object with the path to the table in which data will be stored client = bigquery.Client(project = bq_configuration["project_id"]) dataset_ref = client.dataset(bq_configuration["dataset_id"]) table_ref = dataset_ref.table(bq_configuration["table_id"]) # determine uploading options job_config = bigquery.LoadJobConfig() job_config.source_format = bq_configuration["source_format"].upper() job_config.write_disposition = bq_configuration["write_disposition"] if bq_configuration["source_format"].upper() == "CSV": job_config.field_delimiter = bq_configuration["delimiter"] job_config.skip_leading_rows = 1 job_config.autodetect = True # upload the file to BigQuery table with open(path_to_file, "rb") as source_file: job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config) job.result() print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".") os.remove(path_to_file)
Example #21
Source File: main.py From BigQuery-integrations with MIT License | 6 votes |
def load_to_gbq(filename, bq_configuration): """ Loading data to BigQuery using *bq_configuration* settings. """ # construct Client object with the path to the table in which data will be stored client = bigquery.Client(project = bq_configuration["project_id"]) dataset_ref = client.dataset(bq_configuration["dataset_id"]) table_ref = dataset_ref.table(bq_configuration["table"]) # determine uploading options job_config = bigquery.LoadJobConfig() job_config.write_disposition = 'WRITE_TRUNCATE' job_config.source_format = bq_configuration["source_format"] job_config.autodetect = True if bq_configuration["source_format"].upper() == "CSV": job_config.skip_leading_rows = 1 # upload the file to BigQuery table with open(filename, "rb") as source_file: job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config) job.result() print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table"] + ".") os.remove(filename)
Example #22
Source File: main.py From BigQuery-integrations with MIT License | 6 votes |
def load_to_gbq(client, data, bq_configuration): """ Loading data to BigQuery using *bq_configuration* settings. """ client = bigquery.Client(project = bq_configuration["project_id"]) dataset_ref = client.dataset(bq_configuration["dataset_id"]) table_ref = dataset_ref.table(bq_configuration["table"]) # determine uploading options job_config = bigquery.LoadJobConfig() job_config.write_disposition = 'WRITE_TRUNCATE' job_config.source_format = "NEWLINE_DELIMITED_JSON" job_config.autodetect = True load_job = client.load_table_from_file( data, table_ref, job_config = job_config) # API request print('Starting job {}'.format(load_job.job_id)) load_job.result() # Waits for table load to complete. print('Job finished.')
Example #23
Source File: benchmark_uploader.py From Gun-Detector with Apache License 2.0 | 6 votes |
def __init__(self, logging_dir, gcp_project=None, credentials=None): """Initialized BigQueryUploader with proper setting. Args: logging_dir: string, logging directory that contains the benchmark log. gcp_project: string, the name of the GCP project that the log will be uploaded to. The default project name will be detected from local environment if no value is provided. credentials: google.auth.credentials. The credential to access the BigQuery service. The default service account credential will be detected from local environment if no value is provided. Please use google.oauth2.service_account.Credentials to load credential from local file for the case that the test is run out side of GCP. """ self._logging_dir = logging_dir self._bq_client = bigquery.Client( project=gcp_project, credentials=credentials)
Example #24
Source File: authenticate_service_account.py From python-docs-samples with Apache License 2.0 | 6 votes |
def main(): key_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") # [START bigquery_client_json_credentials] from google.cloud import bigquery from google.oauth2 import service_account # TODO(developer): Set key_path to the path to the service account key # file. # key_path = "path/to/service_account.json" credentials = service_account.Credentials.from_service_account_file( key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) client = bigquery.Client( credentials=credentials, project=credentials.project_id, ) # [END bigquery_client_json_credentials] return client
Example #25
Source File: quickstart_searchallresources_test.py From python-docs-samples with Apache License 2.0 | 5 votes |
def bigquery_client(): yield bigquery.Client()
Example #26
Source File: resources.py From dagster with Apache License 2.0 | 5 votes |
def bigquery_resource(context): return bigquery.Client(**context.resource_config)
Example #27
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_create_table_nested_repeated_schema(client, to_delete): dataset_id = "create_table_nested_repeated_{}".format(_millis()) project = client.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = bigquery.Dataset(dataset_ref) client.create_dataset(dataset) to_delete.append(dataset) # [START bigquery_nested_repeated_schema] # from google.cloud import bigquery # client = bigquery.Client() # project = client.project # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') schema = [ bigquery.SchemaField("id", "STRING", mode="NULLABLE"), bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), bigquery.SchemaField( "addresses", "RECORD", mode="REPEATED", fields=[ bigquery.SchemaField("status", "STRING", mode="NULLABLE"), bigquery.SchemaField("address", "STRING", mode="NULLABLE"), bigquery.SchemaField("city", "STRING", mode="NULLABLE"), bigquery.SchemaField("state", "STRING", mode="NULLABLE"), bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), ], ), ] table_ref = dataset_ref.table("my_table") table = bigquery.Table(table_ref, schema=schema) table = client.create_table(table) # API request print("Created table {}".format(table.full_table_id)) # [END bigquery_nested_repeated_schema]
Example #28
Source File: streaming_beam_test.py From python-docs-samples with Apache License 2.0 | 5 votes |
def dataset(): bigquery_client = bigquery.Client(project=PROJECT) dataset_id = '{}.{}'.format(PROJECT, DATASET) dataset = bigquery.Dataset(dataset_id) dataset = bigquery_client.create_dataset(dataset, exists_ok=True) yield '{}:{}'.format(PROJECT, DATASET) bigquery_client.delete_table('{}.{}'.format(DATASET, TABLE), not_found_ok=True) bigquery_client.delete_dataset(DATASET, not_found_ok=True)
Example #29
Source File: streaming_beam_test.py From python-docs-samples with Apache License 2.0 | 5 votes |
def test_dataflow_flex_templates_pubsub_to_bigquery(dataset, topic_path, subscription_path): # Use one process to publish messages to a topic. publish_process = mp.Process(target=lambda: _infinite_publish_job(topic_path)) # Use another process to run the streaming pipeline that should write one # row to BigQuery every minute (according to the default window size). pipeline_process = mp.Process(target=lambda: sp.call([ 'python', 'streaming_beam.py', '--project', PROJECT, '--runner', 'DirectRunner', '--temp_location', tempfile.mkdtemp(), '--input_subscription', subscription_path, '--output_table', '{}.{}'.format(dataset, TABLE), '--window_interval', '5', ])) publish_process.start() pipeline_process.start() pipeline_process.join(timeout=30) publish_process.join(timeout=0) pipeline_process.terminate() publish_process.terminate() # Check for output data in BigQuery. bigquery_client = bigquery.Client(project=PROJECT) query = 'SELECT * FROM {}.{}'.format(DATASET, TABLE) query_job = bigquery_client.query(query) rows = query_job.result() assert rows.total_rows > 0 for row in rows: assert row['score'] == 1 # TODO:Testcase using Teststream currently does not work as intended. # The first write to BigQuery fails. Have filed a bug. The test case # to be changed once the bug gets fixed.
Example #30
Source File: snippets.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_create_client_default_credentials(): """Create a BigQuery client with Application Default Credentials""" # [START bigquery_client_default_credentials] from google.cloud import bigquery # If you don't specify credentials when constructing the client, the # client library will look for credentials in the environment. client = bigquery.Client() # [END bigquery_client_default_credentials] assert client is not None