Python Examples of google.cloud.bigquery.Client

Source File: connection.py From insightconnect-plugins with MIT License

8 votes

def connect(self, params):
        self.logger.info(f"Connect: Connecting...")
        self.client = bigquery.Client(
            project=params.get(Input.PROJECT_ID),
            credentials=service_account.Credentials.from_service_account_info({
              "type": "service_account",
              "project_id": params.get(Input.PROJECT_ID),
              "private_key_id": params.get(Input.PRIVATE_KEY_ID),
              "private_key": params.get(Input.PRIVATE_KEY).get("privateKey").replace('\\n', "\n", -1),
              "client_email": params.get(Input.CLIENT_EMAIL),
              "client_id": params.get(Input.CLIENT_ID),
              "auth_uri": params.get(Input.AUTH_URI),
              "client_x509_cert_url": params.get(Input.CLIENT_X509_CERT_URL),
              "token_uri": params.get(Input.TOKEN_URI, "https://oauth2.googleapis.com/token"),
              "auth_provider_x509_cert_url": params.get(Input.AUTH_PROVIDER_X509_CERT_URL,
                                                        "https://www.googleapis.com/oauth2/v1/certs")
            })
        )

Source File: bq_writer.py From lookml-tools with Apache License 2.0

8 votes

def _upload_to_gcs(self, gcs_project_id, target_bucket_name, bucket_folder, filename):
        '''upload CSV to file in GCS

        Args:
            gcs_project_id (str): project name
            target_bucket_name (str): name of GCS bucket
            bucket_folder (str): name of GCS folder
            filename (str): filepath to upload

        Returns:
            nothing. Side effect is that data is uploaded to GCS

        '''
        storage_client = storage.Client(gcs_project_id)
        bucket = storage_client.get_bucket(target_bucket_name)
        path = bucket_folder + os.sep + filename
        logging.info("Loading to GCS: %s", path)
        blob = bucket.blob(path) #name in GCS
        blob.upload_from_filename(filename)

Source File: samples_test.py From python-docs-samples with Apache License 2.0

7 votes

def test_client_library_query_bqstorage():
    # [START bigquery_migration_client_library_query_bqstorage]
    import google.auth
    from google.cloud import bigquery
    from google.cloud import bigquery_storage_v1beta1

    # Create a BigQuery client and a BigQuery Storage API client with the same
    # credentials to avoid authenticating twice.
    credentials, project_id = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    client = bigquery.Client(credentials=credentials, project=project_id)
    bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient(
        credentials=credentials
    )
    sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"

    # Use a BigQuery Storage API client to download results more quickly.
    df = client.query(sql).to_dataframe(bqstorage_client=bqstorage_client)
    # [END bigquery_migration_client_library_query_bqstorage]
    assert len(df) > 0

Source File: deploy_app.py From runtimes-common with Apache License 2.0

6 votes

def _record_latency_to_bigquery(deploy_latency, language, is_xrt):
    current_date = datetime.datetime.now()
    row = [(language, current_date, deploy_latency, is_xrt)]

    project = os.environ.get(DEPLOY_LATENCY_PROJECT_ENV)
    if not project:
        logging.warn('No project specified to record deployment latency!')
        logging.warn('If you wish to record deployment latency, \
                     please set %s env var and try again.',
                     DEPLOY_LATENCY_PROJECT_ENV)
        return 0
    logging.debug('Fetching bigquery client for project %s', project)
    client = bigquery.Client(project=project)
    dataset = client.dataset(DATASET_NAME)
    logging.debug('Writing bigquery data to table %s in dataset %s',
                  TABLE_NAME, dataset)
    table_ref = bigquery.TableReference(dataset_ref=dataset,
                                        table_id=TABLE_NAME)
    table = client.get_table(table_ref)
    return client.create_rows(table, row)

Source File: benchmark_uploader.py From models with Apache License 2.0

6 votes

def __init__(self, logging_dir, gcp_project=None, credentials=None):
    """Initialized BigQueryUploader with proper setting.

    Args:
      logging_dir: string, logging directory that contains the benchmark log.
      gcp_project: string, the name of the GCP project that the log will be
        uploaded to. The default project name will be detected from local
        environment if no value is provided.
      credentials: google.auth.credentials. The credential to access the
        BigQuery service. The default service account credential will be
        detected from local environment if no value is provided. Please use
        google.oauth2.service_account.Credentials to load credential from local
        file for the case that the test is run out side of GCP.
    """
    self._logging_dir = logging_dir
    self._bq_client = bigquery.Client(
        project=gcp_project, credentials=credentials)

Source File: bq_to_xml.py From healthcare-deid with Apache License 2.0

6 votes

def run(input_query, output_dir, task_name, id_columns, target_column):
  """Get the BigQuery data and write it to local files."""
  if output_dir.startswith('gs://'):
    raise Exception('Writing the output to a GCS bucket is not supported; '
                    'please write to a local directory. You can then upload '
                    'your files using "gsutil cp".')
  bq_client = bigquery.Client()
  job_config = bigquery.job.QueryJobConfig()
  job_config.use_legacy_sql = True
  query_job = bq_client.query(input_query, job_config=job_config)
  results_table = query_job.result()

  for row in results_table:
    id_str = '-'.join([str(row.get(col)) for col in id_columns])
    filename = os.path.join(output_dir, id_str + '.xml')
    with codecs.open(filename, 'w', encoding='utf-8') as f:
      f.write(TEMPLATE.format(task_name, row.get(target_column)))

  logging.info('Output written to "%s"', output_dir)

Source File: server.py From healthcare-deid with Apache License 2.0

6 votes

def verify_gcs_path(path):
  """Verifies that a GCS path exists.

  Args:
    path: A string that represents the target path.
  Returns:
    A boolean of the verification status.
  """
  storage_client = storage.Client()
  path_info = gcsutil.GcsFileName.from_path(path)
  try:
    bucket = storage_client.get_bucket(path_info.bucket)
  except exceptions.NotFound:
    return False
  return storage.Blob(bucket=bucket,
                      name=path_info.blob).exists(storage_client)

Source File: main_test.py From python-docs-samples with Apache License 2.0

6 votes

def clients():
    # [START bigquerystorage_pandas_tutorial_all]
    # [START bigquerystorage_pandas_tutorial_create_client]
    import google.auth
    from google.cloud import bigquery
    from google.cloud import bigquery_storage_v1beta1

    # Explicitly create a credentials object. This allows you to use the same
    # credentials for both the BigQuery and BigQuery Storage clients, avoiding
    # unnecessary API calls to fetch duplicate authentication tokens.
    credentials, your_project_id = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )

    # Make clients.
    bqclient = bigquery.Client(
        credentials=credentials,
        project=your_project_id,
    )
    bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient(
        credentials=credentials
    )
    # [END bigquerystorage_pandas_tutorial_create_client]
    # [END bigquerystorage_pandas_tutorial_all]
    return bqclient, bqstorageclient

Source File: benchmark_uploader.py From models with Apache License 2.0

6 votes

def __init__(self, logging_dir, gcp_project=None, credentials=None):
    """Initialized BigQueryUploader with proper setting.

    Args:
      logging_dir: string, logging directory that contains the benchmark log.
      gcp_project: string, the name of the GCP project that the log will be
        uploaded to. The default project name will be detected from local
        environment if no value is provided.
      credentials: google.auth.credentials. The credential to access the
        BigQuery service. The default service account credential will be
        detected from local environment if no value is provided. Please use
        google.oauth2.service_account.Credentials to load credential from local
        file for the case that the test is run out side of GCP.
    """
    self._logging_dir = logging_dir
    self._bq_client = bigquery.Client(
        project=gcp_project, credentials=credentials)

Source File: sqlalchemy_bigquery.py From pybigquery with MIT License

6 votes

def _create_client_from_credentials(self, credentials, default_query_job_config, project_id):
        if project_id is None:
            project_id = credentials.project_id

        scopes = (
                'https://www.googleapis.com/auth/bigquery',
                'https://www.googleapis.com/auth/cloud-platform',
                'https://www.googleapis.com/auth/drive'
            )
        credentials = credentials.with_scopes(scopes)

        self._add_default_dataset_to_job_config(default_query_job_config, project_id, self.dataset_id)

        return bigquery.Client(
                project=project_id,
                credentials=credentials,
                location=self.location,
                default_query_job_config=default_query_job_config,
            )

Source File: samples_test.py From python-docs-samples with Apache License 2.0

6 votes

def test_client_library_legacy_query():
    # [START bigquery_migration_client_library_query_legacy]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM [bigquery-public-data:usa_names.usa_1910_current]
        WHERE state = 'TX'
        LIMIT 100
    """
    query_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    df = client.query(sql, job_config=query_config).to_dataframe()
    # [END bigquery_migration_client_library_query_legacy]
    assert len(df) > 0

Source File: kaggle_gcp.py From docker-python with Apache License 2.0

6 votes

def init_gcs():
    is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ
    from google.cloud import storage
    if not is_user_secrets_token_set:
        return storage

    from kaggle_gcp import get_integrations
    if not get_integrations().has_gcs():
        return storage

    from kaggle_secrets import GcpTarget
    from kaggle_gcp import KaggleKernelCredentials
    monkeypatch_client(
        storage.Client,
        KaggleKernelCredentials(target=GcpTarget.GCS))
    return storage

Source File: samples_test.py From python-docs-samples with Apache License 2.0

6 votes

def test_client_library_query():
    # [START bigquery_migration_client_library_query]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM `bigquery-public-data.usa_names.usa_1910_current`
        WHERE state = 'TX'
        LIMIT 100
    """

    # Run a Standard SQL query using the environment's default project
    df = client.query(sql).to_dataframe()

    # Run a Standard SQL query with the project set explicitly
    project_id = 'your-project-id'
    # [END bigquery_migration_client_library_query]
    assert len(df) > 0
    project_id = os.environ['GOOGLE_CLOUD_PROJECT']
    # [START bigquery_migration_client_library_query]
    df = client.query(sql, project=project_id).to_dataframe()
    # [END bigquery_migration_client_library_query]
    assert len(df) > 0

Source File: simple_app.py From python-docs-samples with Apache License 2.0

6 votes

def query_stackoverflow():
    # [START bigquery_simple_app_client]
    client = bigquery.Client()
    # [END bigquery_simple_app_client]
    # [START bigquery_simple_app_query]
    query_job = client.query("""
        SELECT
          CONCAT(
            'https://stackoverflow.com/questions/',
            CAST(id as STRING)) as url,
          view_count
        FROM `bigquery-public-data.stackoverflow.posts_questions`
        WHERE tags like '%google-bigquery%'
        ORDER BY view_count DESC
        LIMIT 10""")

    results = query_job.result()  # Waits for job to complete.
    # [END bigquery_simple_app_query]

    # [START bigquery_simple_app_print]
    for row in results:
        print("{} : {} views".format(row.url, row.view_count))
    # [END bigquery_simple_app_print]

Source File: client.py From ibis with Apache License 2.0

6 votes

def __init__(self, project_id, dataset_id=None, credentials=None):
        """Construct a BigQueryClient.

        Parameters
        ----------
        project_id : str
            A project name
        dataset_id : Optional[str]
            A ``<project_id>.<dataset_id>`` string or just a dataset name
        credentials : google.auth.credentials.Credentials

        """
        (
            self.data_project,
            self.billing_project,
            self.dataset,
        ) = parse_project_and_dataset(project_id, dataset_id)
        self.client = bq.Client(
            project=self.data_project, credentials=credentials
        )

Source File: benchmark.py From runtimes-common with Apache License 2.0

6 votes

def _record_build_times_to_bigquery(self, build_times):
        current_date = datetime.datetime.now()
        logging.info('Retrieving bigquery client')
        client = bigquery.Client(project=self._project)

        dataset_ref = client.dataset(self._dataset)
        table_ref = dataset_ref.table(self._table)
        table = client.get_table(table_ref)

        full_name = "{0}:{1}.{2}".format(self._project, self._dataset,
                                         self._table)

        logging.info("Adding build time data to {0}".format(full_name))
        rows = [(current_date, self._description, bt[0], bt[1])
                for bt in build_times]
        client.create_rows(table, rows)
        logging.info("Finished adding build times to {0}".format(full_name))

Source File: test_auth.py From pandas-gbq with BSD 3-Clause "New" or "Revised" License

6 votes

def _try_credentials(project_id, credentials):
    from google.cloud import bigquery
    import google.api_core.exceptions
    import google.auth.exceptions

    if not credentials:
        return None
    if not project_id:
        return credentials

    try:
        client = bigquery.Client(project=project_id, credentials=credentials)
        # Check if the application has rights to the BigQuery project
        client.query("SELECT 1").result()
        return credentials
    except google.api_core.exceptions.GoogleAPIError:
        return None
    except google.auth.exceptions.RefreshError:
        # Sometimes (such as on Travis) google-auth returns GCE credentials,
        # but fetching the token for those credentials doesn't actually work.
        # See:
        # https://github.com/googleapis/google-auth-library-python/issues/287
        return None