Python google.cloud.bigquery.Client() Examples

The following are 30 code examples of google.cloud.bigquery.Client(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.bigquery , or try the search function .
Example #1
Source File: connection.py    From insightconnect-plugins with MIT License 8 votes vote down vote up
def connect(self, params):
        self.logger.info(f"Connect: Connecting...")
        self.client = bigquery.Client(
            project=params.get(Input.PROJECT_ID),
            credentials=service_account.Credentials.from_service_account_info({
              "type": "service_account",
              "project_id": params.get(Input.PROJECT_ID),
              "private_key_id": params.get(Input.PRIVATE_KEY_ID),
              "private_key": params.get(Input.PRIVATE_KEY).get("privateKey").replace('\\n', "\n", -1),
              "client_email": params.get(Input.CLIENT_EMAIL),
              "client_id": params.get(Input.CLIENT_ID),
              "auth_uri": params.get(Input.AUTH_URI),
              "client_x509_cert_url": params.get(Input.CLIENT_X509_CERT_URL),
              "token_uri": params.get(Input.TOKEN_URI, "https://oauth2.googleapis.com/token"),
              "auth_provider_x509_cert_url": params.get(Input.AUTH_PROVIDER_X509_CERT_URL,
                                                        "https://www.googleapis.com/oauth2/v1/certs")
            })
        ) 
Example #2
Source File: bq_writer.py    From lookml-tools with Apache License 2.0 8 votes vote down vote up
def _upload_to_gcs(self, gcs_project_id, target_bucket_name, bucket_folder, filename):
        '''upload CSV to file in GCS

        Args:
            gcs_project_id (str): project name
            target_bucket_name (str): name of GCS bucket
            bucket_folder (str): name of GCS folder
            filename (str): filepath to upload

        Returns:
            nothing. Side effect is that data is uploaded to GCS

        '''
        storage_client = storage.Client(gcs_project_id)
        bucket = storage_client.get_bucket(target_bucket_name)
        path = bucket_folder + os.sep + filename
        logging.info("Loading to GCS: %s", path)
        blob = bucket.blob(path) #name in GCS
        blob.upload_from_filename(filename) 
Example #3
Source File: samples_test.py    From python-docs-samples with Apache License 2.0 7 votes vote down vote up
def test_client_library_query_bqstorage():
    # [START bigquery_migration_client_library_query_bqstorage]
    import google.auth
    from google.cloud import bigquery
    from google.cloud import bigquery_storage_v1beta1

    # Create a BigQuery client and a BigQuery Storage API client with the same
    # credentials to avoid authenticating twice.
    credentials, project_id = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    client = bigquery.Client(credentials=credentials, project=project_id)
    bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient(
        credentials=credentials
    )
    sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"

    # Use a BigQuery Storage API client to download results more quickly.
    df = client.query(sql).to_dataframe(bqstorage_client=bqstorage_client)
    # [END bigquery_migration_client_library_query_bqstorage]
    assert len(df) > 0 
Example #4
Source File: deploy_app.py    From runtimes-common with Apache License 2.0 6 votes vote down vote up
def _record_latency_to_bigquery(deploy_latency, language, is_xrt):
    current_date = datetime.datetime.now()
    row = [(language, current_date, deploy_latency, is_xrt)]

    project = os.environ.get(DEPLOY_LATENCY_PROJECT_ENV)
    if not project:
        logging.warn('No project specified to record deployment latency!')
        logging.warn('If you wish to record deployment latency, \
                     please set %s env var and try again.',
                     DEPLOY_LATENCY_PROJECT_ENV)
        return 0
    logging.debug('Fetching bigquery client for project %s', project)
    client = bigquery.Client(project=project)
    dataset = client.dataset(DATASET_NAME)
    logging.debug('Writing bigquery data to table %s in dataset %s',
                  TABLE_NAME, dataset)
    table_ref = bigquery.TableReference(dataset_ref=dataset,
                                        table_id=TABLE_NAME)
    table = client.get_table(table_ref)
    return client.create_rows(table, row) 
Example #5
Source File: benchmark_uploader.py    From models with Apache License 2.0 6 votes vote down vote up
def __init__(self, logging_dir, gcp_project=None, credentials=None):
    """Initialized BigQueryUploader with proper setting.

    Args:
      logging_dir: string, logging directory that contains the benchmark log.
      gcp_project: string, the name of the GCP project that the log will be
        uploaded to. The default project name will be detected from local
        environment if no value is provided.
      credentials: google.auth.credentials. The credential to access the
        BigQuery service. The default service account credential will be
        detected from local environment if no value is provided. Please use
        google.oauth2.service_account.Credentials to load credential from local
        file for the case that the test is run out side of GCP.
    """
    self._logging_dir = logging_dir
    self._bq_client = bigquery.Client(
        project=gcp_project, credentials=credentials) 
Example #6
Source File: bq_to_xml.py    From healthcare-deid with Apache License 2.0 6 votes vote down vote up
def run(input_query, output_dir, task_name, id_columns, target_column):
  """Get the BigQuery data and write it to local files."""
  if output_dir.startswith('gs://'):
    raise Exception('Writing the output to a GCS bucket is not supported; '
                    'please write to a local directory. You can then upload '
                    'your files using "gsutil cp".')
  bq_client = bigquery.Client()
  job_config = bigquery.job.QueryJobConfig()
  job_config.use_legacy_sql = True
  query_job = bq_client.query(input_query, job_config=job_config)
  results_table = query_job.result()

  for row in results_table:
    id_str = '-'.join([str(row.get(col)) for col in id_columns])
    filename = os.path.join(output_dir, id_str + '.xml')
    with codecs.open(filename, 'w', encoding='utf-8') as f:
      f.write(TEMPLATE.format(task_name, row.get(target_column)))

  logging.info('Output written to "%s"', output_dir) 
Example #7
Source File: server.py    From healthcare-deid with Apache License 2.0 6 votes vote down vote up
def verify_gcs_path(path):
  """Verifies that a GCS path exists.

  Args:
    path: A string that represents the target path.
  Returns:
    A boolean of the verification status.
  """
  storage_client = storage.Client()
  path_info = gcsutil.GcsFileName.from_path(path)
  try:
    bucket = storage_client.get_bucket(path_info.bucket)
  except exceptions.NotFound:
    return False
  return storage.Blob(bucket=bucket,
                      name=path_info.blob).exists(storage_client) 
Example #8
Source File: main_test.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def clients():
    # [START bigquerystorage_pandas_tutorial_all]
    # [START bigquerystorage_pandas_tutorial_create_client]
    import google.auth
    from google.cloud import bigquery
    from google.cloud import bigquery_storage_v1beta1

    # Explicitly create a credentials object. This allows you to use the same
    # credentials for both the BigQuery and BigQuery Storage clients, avoiding
    # unnecessary API calls to fetch duplicate authentication tokens.
    credentials, your_project_id = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )

    # Make clients.
    bqclient = bigquery.Client(
        credentials=credentials,
        project=your_project_id,
    )
    bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient(
        credentials=credentials
    )
    # [END bigquerystorage_pandas_tutorial_create_client]
    # [END bigquerystorage_pandas_tutorial_all]
    return bqclient, bqstorageclient 
Example #9
Source File: benchmark_uploader.py    From models with Apache License 2.0 6 votes vote down vote up
def __init__(self, logging_dir, gcp_project=None, credentials=None):
    """Initialized BigQueryUploader with proper setting.

    Args:
      logging_dir: string, logging directory that contains the benchmark log.
      gcp_project: string, the name of the GCP project that the log will be
        uploaded to. The default project name will be detected from local
        environment if no value is provided.
      credentials: google.auth.credentials. The credential to access the
        BigQuery service. The default service account credential will be
        detected from local environment if no value is provided. Please use
        google.oauth2.service_account.Credentials to load credential from local
        file for the case that the test is run out side of GCP.
    """
    self._logging_dir = logging_dir
    self._bq_client = bigquery.Client(
        project=gcp_project, credentials=credentials) 
Example #10
Source File: sqlalchemy_bigquery.py    From pybigquery with MIT License 6 votes vote down vote up
def _create_client_from_credentials(self, credentials, default_query_job_config, project_id):
        if project_id is None:
            project_id = credentials.project_id

        scopes = (
                'https://www.googleapis.com/auth/bigquery',
                'https://www.googleapis.com/auth/cloud-platform',
                'https://www.googleapis.com/auth/drive'
            )
        credentials = credentials.with_scopes(scopes)

        self._add_default_dataset_to_job_config(default_query_job_config, project_id, self.dataset_id)

        return bigquery.Client(
                project=project_id,
                credentials=credentials,
                location=self.location,
                default_query_job_config=default_query_job_config,
            ) 
Example #11
Source File: samples_test.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def test_client_library_legacy_query():
    # [START bigquery_migration_client_library_query_legacy]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM [bigquery-public-data:usa_names.usa_1910_current]
        WHERE state = 'TX'
        LIMIT 100
    """
    query_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    df = client.query(sql, job_config=query_config).to_dataframe()
    # [END bigquery_migration_client_library_query_legacy]
    assert len(df) > 0 
Example #12
Source File: kaggle_gcp.py    From docker-python with Apache License 2.0 6 votes vote down vote up
def init_gcs():
    is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ
    from google.cloud import storage
    if not is_user_secrets_token_set:
        return storage

    from kaggle_gcp import get_integrations
    if not get_integrations().has_gcs():
        return storage

    from kaggle_secrets import GcpTarget
    from kaggle_gcp import KaggleKernelCredentials
    monkeypatch_client(
        storage.Client,
        KaggleKernelCredentials(target=GcpTarget.GCS))
    return storage 
Example #13
Source File: samples_test.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def test_client_library_query():
    # [START bigquery_migration_client_library_query]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM `bigquery-public-data.usa_names.usa_1910_current`
        WHERE state = 'TX'
        LIMIT 100
    """

    # Run a Standard SQL query using the environment's default project
    df = client.query(sql).to_dataframe()

    # Run a Standard SQL query with the project set explicitly
    project_id = 'your-project-id'
    # [END bigquery_migration_client_library_query]
    assert len(df) > 0
    project_id = os.environ['GOOGLE_CLOUD_PROJECT']
    # [START bigquery_migration_client_library_query]
    df = client.query(sql, project=project_id).to_dataframe()
    # [END bigquery_migration_client_library_query]
    assert len(df) > 0 
Example #14
Source File: simple_app.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def query_stackoverflow():
    # [START bigquery_simple_app_client]
    client = bigquery.Client()
    # [END bigquery_simple_app_client]
    # [START bigquery_simple_app_query]
    query_job = client.query("""
        SELECT
          CONCAT(
            'https://stackoverflow.com/questions/',
            CAST(id as STRING)) as url,
          view_count
        FROM `bigquery-public-data.stackoverflow.posts_questions`
        WHERE tags like '%google-bigquery%'
        ORDER BY view_count DESC
        LIMIT 10""")

    results = query_job.result()  # Waits for job to complete.
    # [END bigquery_simple_app_query]

    # [START bigquery_simple_app_print]
    for row in results:
        print("{} : {} views".format(row.url, row.view_count))
    # [END bigquery_simple_app_print] 
Example #15
Source File: client.py    From ibis with Apache License 2.0 6 votes vote down vote up
def __init__(self, project_id, dataset_id=None, credentials=None):
        """Construct a BigQueryClient.

        Parameters
        ----------
        project_id : str
            A project name
        dataset_id : Optional[str]
            A ``<project_id>.<dataset_id>`` string or just a dataset name
        credentials : google.auth.credentials.Credentials

        """
        (
            self.data_project,
            self.billing_project,
            self.dataset,
        ) = parse_project_and_dataset(project_id, dataset_id)
        self.client = bq.Client(
            project=self.data_project, credentials=credentials
        ) 
Example #16
Source File: benchmark.py    From runtimes-common with Apache License 2.0 6 votes vote down vote up
def _record_build_times_to_bigquery(self, build_times):
        current_date = datetime.datetime.now()
        logging.info('Retrieving bigquery client')
        client = bigquery.Client(project=self._project)

        dataset_ref = client.dataset(self._dataset)
        table_ref = dataset_ref.table(self._table)
        table = client.get_table(table_ref)

        full_name = "{0}:{1}.{2}".format(self._project, self._dataset,
                                         self._table)

        logging.info("Adding build time data to {0}".format(full_name))
        rows = [(current_date, self._description, bt[0], bt[1])
                for bt in build_times]
        client.create_rows(table, rows)
        logging.info("Finished adding build times to {0}".format(full_name)) 
Example #17
Source File: test_auth.py    From pandas-gbq with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _try_credentials(project_id, credentials):
    from google.cloud import bigquery
    import google.api_core.exceptions
    import google.auth.exceptions

    if not credentials:
        return None
    if not project_id:
        return credentials

    try:
        client = bigquery.Client(project=project_id, credentials=credentials)
        # Check if the application has rights to the BigQuery project
        client.query("SELECT 1").result()
        return credentials
    except google.api_core.exceptions.GoogleAPIError:
        return None
    except google.auth.exceptions.RefreshError:
        # Sometimes (such as on Travis) google-auth returns GCE credentials,
        # but fetching the token for those credentials doesn't actually work.
        # See:
        # https://github.com/googleapis/google-auth-library-python/issues/287
        return None 
Example #18
Source File: main.py    From BigQuery-integrations with MIT License 6 votes vote down vote up
def give_file_gbq(path_to_file, bq_configuration):
    """
        Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table_id"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.source_format = "NEWLINE_DELIMITED_JSON"
    job_config.write_disposition = bq_configuration["write_disposition"]
    job_config.autodetect = True

    # upload the file to BigQuery table
    with open(path_to_file, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".") 
Example #19
Source File: main.py    From BigQuery-integrations with MIT License 6 votes vote down vote up
def give_file_gbq(path_to_file, bq_configuration):
    """
        Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table_id"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.source_format = bq_configuration["source_format"].upper()
    job_config.write_disposition = bq_configuration["write_disposition"]
    if bq_configuration["source_format"].upper() == "CSV":
        job_config.field_delimiter = bq_configuration["delimiter"]
        job_config.skip_leading_rows = 1
    job_config.autodetect = True

    # upload the file to BigQuery table
    with open(path_to_file, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
    os.remove(path_to_file) 
Example #20
Source File: main.py    From BigQuery-integrations with MIT License 6 votes vote down vote up
def give_file_gbq(path_to_file, bq_configuration):
    """
        Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table_id"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.source_format = bq_configuration["source_format"].upper()
    job_config.write_disposition = bq_configuration["write_disposition"]
    if bq_configuration["source_format"].upper() == "CSV":
        job_config.field_delimiter = bq_configuration["delimiter"]
        job_config.skip_leading_rows = 1
    job_config.autodetect = True

    # upload the file to BigQuery table
    with open(path_to_file, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
    os.remove(path_to_file) 
Example #21
Source File: main.py    From BigQuery-integrations with MIT License 6 votes vote down vote up
def load_to_gbq(filename, bq_configuration):
    """
        Loading data to BigQuery using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = 'WRITE_TRUNCATE'
    job_config.source_format = bq_configuration["source_format"]
    job_config.autodetect = True
    if bq_configuration["source_format"].upper() == "CSV":
        job_config.skip_leading_rows = 1

    # upload the file to BigQuery table
    with open(filename, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." +
          bq_configuration["dataset_id"] + "." + bq_configuration["table"] + ".")
    os.remove(filename) 
Example #22
Source File: main.py    From BigQuery-integrations with MIT License 6 votes vote down vote up
def load_to_gbq(client, data, bq_configuration):
    """
        Loading data to BigQuery using *bq_configuration* settings.
    """
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = 'WRITE_TRUNCATE'
    job_config.source_format = "NEWLINE_DELIMITED_JSON"
    job_config.autodetect = True

    load_job = client.load_table_from_file(
        data,
        table_ref,
        job_config = job_config)  # API request
    print('Starting job {}'.format(load_job.job_id))

    load_job.result()  # Waits for table load to complete.
    print('Job finished.') 
Example #23
Source File: benchmark_uploader.py    From Gun-Detector with Apache License 2.0 6 votes vote down vote up
def __init__(self, logging_dir, gcp_project=None, credentials=None):
    """Initialized BigQueryUploader with proper setting.

    Args:
      logging_dir: string, logging directory that contains the benchmark log.
      gcp_project: string, the name of the GCP project that the log will be
        uploaded to. The default project name will be detected from local
        environment if no value is provided.
      credentials: google.auth.credentials. The credential to access the
        BigQuery service. The default service account credential will be
        detected from local environment if no value is provided. Please use
        google.oauth2.service_account.Credentials to load credential from local
        file for the case that the test is run out side of GCP.
    """
    self._logging_dir = logging_dir
    self._bq_client = bigquery.Client(
        project=gcp_project, credentials=credentials) 
Example #24
Source File: authenticate_service_account.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def main():
    key_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")

    # [START bigquery_client_json_credentials]
    from google.cloud import bigquery
    from google.oauth2 import service_account

    # TODO(developer): Set key_path to the path to the service account key
    #                  file.
    # key_path = "path/to/service_account.json"

    credentials = service_account.Credentials.from_service_account_file(
        key_path,
        scopes=["https://www.googleapis.com/auth/cloud-platform"],
    )

    client = bigquery.Client(
        credentials=credentials,
        project=credentials.project_id,
    )
    # [END bigquery_client_json_credentials]
    return client 
Example #25
Source File: quickstart_searchallresources_test.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def bigquery_client():
    yield bigquery.Client() 
Example #26
Source File: resources.py    From dagster with Apache License 2.0 5 votes vote down vote up
def bigquery_resource(context):
    return bigquery.Client(**context.resource_config) 
Example #27
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_create_table_nested_repeated_schema(client, to_delete):
    dataset_id = "create_table_nested_repeated_{}".format(_millis())
    project = client.project
    dataset_ref = bigquery.DatasetReference(project, dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    client.create_dataset(dataset)
    to_delete.append(dataset)

    # [START bigquery_nested_repeated_schema]
    # from google.cloud import bigquery
    # client = bigquery.Client()
    # project = client.project
    # dataset_ref = bigquery.DatasetReference(project, 'my_dataset')

    schema = [
        bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
        bigquery.SchemaField(
            "addresses",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
                bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
            ],
        ),
    ]
    table_ref = dataset_ref.table("my_table")
    table = bigquery.Table(table_ref, schema=schema)
    table = client.create_table(table)  # API request

    print("Created table {}".format(table.full_table_id))
    # [END bigquery_nested_repeated_schema] 
Example #28
Source File: streaming_beam_test.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def dataset():
    bigquery_client = bigquery.Client(project=PROJECT)
    dataset_id = '{}.{}'.format(PROJECT, DATASET)
    dataset = bigquery.Dataset(dataset_id)
    dataset = bigquery_client.create_dataset(dataset, exists_ok=True)
    yield '{}:{}'.format(PROJECT, DATASET)
    bigquery_client.delete_table('{}.{}'.format(DATASET, TABLE), not_found_ok=True)
    bigquery_client.delete_dataset(DATASET, not_found_ok=True) 
Example #29
Source File: streaming_beam_test.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def test_dataflow_flex_templates_pubsub_to_bigquery(dataset, topic_path,
                                                    subscription_path):
    # Use one process to publish messages to a topic.
    publish_process = mp.Process(target=lambda: _infinite_publish_job(topic_path))

    # Use another process to run the streaming pipeline that should write one
    # row to BigQuery every minute (according to the default window size).
    pipeline_process = mp.Process(target=lambda: sp.call([
        'python', 'streaming_beam.py',
        '--project', PROJECT,
        '--runner', 'DirectRunner',
        '--temp_location', tempfile.mkdtemp(),
        '--input_subscription', subscription_path,
        '--output_table', '{}.{}'.format(dataset, TABLE),
        '--window_interval', '5',
    ]))

    publish_process.start()
    pipeline_process.start()

    pipeline_process.join(timeout=30)
    publish_process.join(timeout=0)

    pipeline_process.terminate()
    publish_process.terminate()

    # Check for output data in BigQuery.
    bigquery_client = bigquery.Client(project=PROJECT)
    query = 'SELECT * FROM {}.{}'.format(DATASET, TABLE)
    query_job = bigquery_client.query(query)
    rows = query_job.result()
    assert rows.total_rows > 0
    for row in rows:
        assert row['score'] == 1


# TODO:Testcase using Teststream currently does not work as intended.
# The first write to BigQuery fails. Have filed a bug. The test case
# to be changed once the bug gets fixed. 
Example #30
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_create_client_default_credentials():
    """Create a BigQuery client with Application Default Credentials"""

    # [START bigquery_client_default_credentials]
    from google.cloud import bigquery

    # If you don't specify credentials when constructing the client, the
    # client library will look for credentials in the environment.
    client = bigquery.Client()
    # [END bigquery_client_default_credentials]

    assert client is not None