Python google.cloud.bigquery.QueryJobConfig() Examples

The following are 30 code examples of google.cloud.bigquery.QueryJobConfig(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.bigquery , or try the search function .
Example #1
Source File: main.py    From professional-services with Apache License 2.0 7 votes vote down vote up
def execute_transformation_query(bq_client):
    """Executes transformation query to a new destination table.
    Args:
        bq_client: Object representing a reference to a BigQuery Client
    """
    dataset_ref = bq_client.get_dataset(bigquery.DatasetReference(
        project=config.config_vars['billing_project_id'],
        dataset_id=config.config_vars['output_dataset_id']))
    table_ref = dataset_ref.table(config.config_vars['output_table_name'])
    job_config = bigquery.QueryJobConfig()
    job_config.destination = table_ref
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    job_config.time_partitioning = bigquery.TimePartitioning(
        field='usage_start_time',
        expiration_ms=None)
    sql = file_to_string(config.config_vars['sql_file_path'])
    sql = sql.format(**config.config_vars)
    logging.info('Attempting query on all dates...')
    # Execute Query
    query_job = bq_client.query(
        sql,
        job_config=job_config)

    query_job.result()  # Waits for the query to finish
    logging.info('Transformation query complete. All partitions are updated.') 
Example #2
Source File: query_no_cache.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def query_no_cache():

    # [START bigquery_query_no_cache]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(use_query_cache=False)
    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """
    query_job = client.query(sql, job_config=job_config)  # Make an API request.

    for row in query_job:
        print(row)
    # [END bigquery_query_no_cache] 
Example #3
Source File: user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def get_max_ingest_timestamp(self):
        """Gets the max timestamp that was set during the latest merge.

        Returns:
            latest_merge as a timestamp in string format.
        """
        get_last_max_ts_config = bigquery.QueryJobConfig()
        get_last_max_ts_config.use_legacy_sql = False
        get_last_max_timestamp_query = self.bq_client.query(
            query='SELECT max(ingestTimestamp) as max_ingest_timestamp '
            'FROM `{0:s}.{1:s}.{2:s}`'.format(self.project_id, self.dataset_id,
                                              self.temp_updates_table_id),
            job_config=get_last_max_ts_config,
            location='US')
        get_last_max_timestamp_query.result()
        results = list(get_last_max_timestamp_query)
        max_ingest_timestamp = results[0]['max_ingest_timestamp']
        if not max_ingest_timestamp:
            max_ingest_timestamp = INITIAL_TIMESTAMP

        return max_ingest_timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %Z') 
Example #4
Source File: user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def merge_updates(self, merge_updates_query):
        """Merges rows from the temp table into the final table.

        Args:
            merge_updates_query(str): Query for merging updates from the temp
                updates table to the final table.

        """

        logging.info('{0:s} Merging updates from {1:s} into {2:s}.'.format(
            str(datetime.datetime.now()), self.temp_updates_table_id,
            self.final_table_id))

        merge_updates_job_config = bigquery.QueryJobConfig()
        merge_updates_job_config.use_legacy_sql = False

        merge_updates_query_job = self.bq_client.query(
            query=merge_updates_query,
            location='US',
            job_config=merge_updates_job_config)
        merge_updates_query_job.result()
        logging.info('{0:s} Successfully merged updates into {1:s}.'.format(
            str(datetime.datetime.now()), self.final_table_id)) 
Example #5
Source File: user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def get_max_ingest_timestamp(self):
        """Gets the max timestamp that was set during the latest merge.

        Returns:
            latest_merge as a timestamp in string format.
        """
        get_last_max_ts_config = bigquery.QueryJobConfig()
        get_last_max_ts_config.use_legacy_sql = False
        get_last_max_timestamp_query = self.bq_client.query(
            query='SELECT max(ingestTimestamp) as max_ingest_timestamp '
            'FROM `{0:s}.{1:s}.{2:s}`'.format(self.project_id, self.dataset_id,
                                              self.temp_updates_table_id),
            job_config=get_last_max_ts_config,
            location='US')
        get_last_max_timestamp_query.result()
        results = list(get_last_max_timestamp_query)
        max_ingest_timestamp = results[0]['max_ingest_timestamp']
        if not max_ingest_timestamp:
            max_ingest_timestamp = INITIAL_TIMESTAMP

        return max_ingest_timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %Z') 
Example #6
Source File: client_query_legacy_sql.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def client_query_legacy_sql():

    # [START bigquery_query_legacy]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = (
        "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
        'WHERE state = "TX" '
        "LIMIT 100"
    )

    # Set use_legacy_sql to True to use legacy SQL syntax.
    job_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    # Start the query, passing in the extra configuration.
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    print("The query data:")
    for row in query_job:
        print(row)
    # [END bigquery_query_legacy] 
Example #7
Source File: taar_similarity.py    From telemetry-airflow with Mozilla Public License 2.0 6 votes vote down vote up
def get_table(view):
    """Helper for determining what table underlies a user-facing view, since the Storage API can't read views."""
    bq = bigquery.Client()
    view = view.replace(":", ".")
    # partition filter is required, so try a couple options
    for partition_column in ["DATE(submission_timestamp)", "submission_date"]:
        try:
            job = bq.query(
                f"SELECT * FROM `{view}` WHERE {partition_column} = CURRENT_DATE",
                bigquery.QueryJobConfig(dry_run=True),
            )
            break
        except Exception:
            continue
    else:
        raise ValueError("could not determine partition column")
    assert len(job.referenced_tables) == 1, "View combines multiple tables"
    table = job.referenced_tables[0]
    return f"{table.project}:{table.dataset_id}.{table.table_id}" 
Example #8
Source File: samples_test.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def test_client_library_query_with_parameters():
    # [START bigquery_migration_client_library_query_parameters]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM `bigquery-public-data.usa_names.usa_1910_current`
        WHERE state = @state
        LIMIT @limit
    """
    query_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter('state', 'STRING', 'TX'),
            bigquery.ScalarQueryParameter('limit', 'INTEGER', 100)
        ]
    )

    df = client.query(sql, job_config=query_config).to_dataframe()
    # [END bigquery_migration_client_library_query_parameters]
    assert len(df) > 0 
Example #9
Source File: samples_test.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def test_client_library_legacy_query():
    # [START bigquery_migration_client_library_query_legacy]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM [bigquery-public-data:usa_names.usa_1910_current]
        WHERE state = 'TX'
        LIMIT 100
    """
    query_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    df = client.query(sql, job_config=query_config).to_dataframe()
    # [END bigquery_migration_client_library_query_legacy]
    assert len(df) > 0 
Example #10
Source File: gcp_utils.py    From recommender-tensorflow with MIT License 6 votes vote down vote up
def bigquery_to_table(query, table_id, dataset_id, client):
    table = get_bigquery_table(table_id, dataset_id, client)

    # set config: insert overwrite to table
    job_config = bigquery.QueryJobConfig(
        destination=table,
        write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE
    )

    # query and insert to table
    job = client.query(query, job_config=job_config)
    job.result()
    logger.info("query results loaded to table: %s.", table.path)
    return table 
Example #11
Source File: BigQueryConnect.py    From CDSS with GNU General Public License v3.0 6 votes vote down vote up
def queryBQ(self, query_str: str, location: str ='US', batch_mode: bool = False, dry_run: bool = False,
                verbose: bool = False) -> bigquery.job.QueryJob:

        job_config = bigquery.QueryJobConfig()
        job_config.dry_run = dry_run

        if batch_mode:
            # Run at batch priority, which won't count toward concurrent rate limit.
            job_config.priority = bigquery.QueryPriority.BATCH

        query_job = self.client.query(query_str, location=location, job_config=job_config)

        if batch_mode: # wait until job is done
            while query_job.state != 'DONE':
                query_job = self.client.get_job(query_job.job_id, location=location)
                print("Job {} is currently in state {}".format(query_job.job_id, query_job.state))
                time.sleep(5)

        if verbose:
            print("This query will process {} bytes.".format(query_job.total_bytes_processed))

        return query_job 
Example #12
Source File: bigquery.py    From openprescribing with MIT License 6 votes vote down vote up
def run_job(self, method_name, args, config_opts, config_default_opts):
        job_config = {
            "copy_table": gcbq.CopyJobConfig,
            "extract_table": gcbq.ExtractJobConfig,
            "load_table_from_file": gcbq.LoadJobConfig,
            "load_table_from_uri": gcbq.LoadJobConfig,
            "query": gcbq.QueryJobConfig,
        }[method_name]()

        for k, v in config_default_opts.items():
            setattr(job_config, k, v)
        for k, v in config_opts.items():
            setattr(job_config, k, v)

        method = getattr(self.gcbq_client, method_name)

        job = method(*args, job_config=job_config)
        if getattr(job_config, "dry_run", False):
            return []
        else:
            return job.result() 
Example #13
Source File: user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def merge_updates(self, merge_updates_query):
        """Merges rows from the temp table into the final table.

        Args:
            merge_updates_query(str): Query for merging updates from the temp
                updates table to the final table.

        """

        logging.info('{0:s} Merging updates from {1:s} into {2:s}.'.format(
            str(datetime.datetime.now()), self.temp_updates_table_id,
            self.final_table_id))

        merge_updates_job_config = bigquery.QueryJobConfig()
        merge_updates_job_config.use_legacy_sql = False

        merge_updates_query_job = self.bq_client.query(
            query=merge_updates_query,
            location='US',
            job_config=merge_updates_job_config)
        merge_updates_query_job.result()
        logging.info('{0:s} Successfully merged updates into {1:s}.'.format(
            str(datetime.datetime.now()), self.final_table_id)) 
Example #14
Source File: client_query_dry_run.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def client_query_dry_run():

    # [START bigquery_query_dry_run]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)

    # Start the query, passing in the extra configuration.
    query_job = client.query(
        (
            "SELECT name, COUNT(*) as name_count "
            "FROM `bigquery-public-data.usa_names.usa_1910_2013` "
            "WHERE state = 'WA' "
            "GROUP BY name"
        ),
        job_config=job_config,
    )  # Make an API request.

    # A dry run query completes immediately.
    print("This query will process {} bytes.".format(query_job.total_bytes_processed))
    # [END bigquery_query_dry_run]
    return query_job 
Example #15
Source File: client_query_dry_run.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def client_query_dry_run():

    # [START bigquery_query_dry_run]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)

    # Start the query, passing in the extra configuration.
    query_job = client.query(
        (
            "SELECT name, COUNT(*) as name_count "
            "FROM `bigquery-public-data.usa_names.usa_1910_2013` "
            "WHERE state = 'WA' "
            "GROUP BY name"
        ),
        job_config=job_config,
    )  # Make an API request.

    # A dry run query completes immediately.
    print("This query will process {} bytes.".format(query_job.total_bytes_processed))
    # [END bigquery_query_dry_run]
    return query_job 
Example #16
Source File: client_query_destination_table.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def client_query_destination_table(table_id):

    # [START bigquery_query_destination_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the destination table.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.QueryJobConfig(destination=table_id)

    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """

    # Start the query, passing in the extra configuration.
    query_job = client.query(sql, job_config=job_config)  # Make an API request.
    query_job.result()  # Wait for the job to complete.

    print("Query results loaded to the table {}".format(table_id))
    # [END bigquery_query_destination_table] 
Example #17
Source File: client_query_legacy_sql.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def client_query_legacy_sql():

    # [START bigquery_query_legacy]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = (
        "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
        'WHERE state = "TX" '
        "LIMIT 100"
    )

    # Set use_legacy_sql to True to use legacy SQL syntax.
    job_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    # Start the query, passing in the extra configuration.
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    print("The query data:")
    for row in query_job:
        print(row)
    # [END bigquery_query_legacy] 
Example #18
Source File: magics.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def default_query_job_config(self):
        """google.cloud.bigquery.job.QueryJobConfig: Default job
        configuration for queries.

        The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
        used for queries. Some properties can be overridden with arguments to
        the magics.

        Example:
            Manually setting the default value for ``maximum_bytes_billed``
            to 100 MB:

            >>> from google.cloud.bigquery import magics
            >>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
        """
        return self._default_query_job_config 
Example #19
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_querying_data_w_timeout(self):
        job_config = bigquery.QueryJobConfig()
        job_config.use_query_cache = False

        query_job = Config.CLIENT.query(
            """
            SELECT name, SUM(number) AS total_people
            FROM `bigquery-public-data.usa_names.usa_1910_current`
            GROUP BY name
            """,
            location="US",
            job_config=job_config,
        )

        # Specify a very tight deadline to demonstrate that the timeout
        # actually has effect.
        with self.assertRaises(requests.exceptions.Timeout):
            query_job.done(timeout=0.1)

        # Now wait for the result using a more realistic deadline.
        query_job.result(timeout=30)
        self.assertTrue(query_job.done(timeout=30)) 
Example #20
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_query_w_legacy_sql_types(self):
        naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
        stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat())
        zoned = naive.replace(tzinfo=UTC)
        examples = [
            {"sql": "SELECT 1", "expected": 1},
            {"sql": "SELECT 1.3", "expected": 1.3},
            {"sql": "SELECT TRUE", "expected": True},
            {"sql": 'SELECT "ABC"', "expected": "ABC"},
            {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"},
            {"sql": 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), "expected": zoned},
        ]
        for example in examples:
            job_config = bigquery.QueryJobConfig()
            job_config.use_legacy_sql = True
            rows = list(Config.CLIENT.query(example["sql"], job_config=job_config))
            self.assertEqual(len(rows), 1)
            self.assertEqual(len(rows[0]), 1)
            self.assertEqual(rows[0][0], example["expected"]) 
Example #21
Source File: create_job.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def create_job():

    # [START bigquery_create_job]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query_job = client.query(
        "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`",
        # Explicitly force job execution to be routed to a specific processing
        # location.
        location="US",
        # Specify a job configuration to set optional job resource properties.
        job_config=bigquery.QueryJobConfig(
            labels={"example-label": "example-value"}, maximum_bytes_billed=1000000
        ),
        # The client libraries automatically generate a job ID. Override the
        # generated ID with either the job_id_prefix or job_id parameters.
        job_id_prefix="code_sample_",
    )  # Make an API request.

    print("Started job: {}".format(query_job.job_id))
    # [END bigquery_create_job]
    return query_job 
Example #22
Source File: create_job.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def create_job():

    # [START bigquery_create_job]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query_job = client.query(
        "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`",
        # Explicitly force job execution to be routed to a specific processing
        # location.
        location="US",
        # Specify a job configuration to set optional job resource properties.
        job_config=bigquery.QueryJobConfig(
            labels={"example-label": "example-value"}, maximum_bytes_billed=1000000
        ),
        # The client libraries automatically generate a job ID. Override the
        # generated ID with either the job_id_prefix or job_id parameters.
        job_id_prefix="code_sample_",
    )  # Make an API request.

    print("Started job: {}".format(query_job.job_id))
    # [END bigquery_create_job]
    return query_job 
Example #23
Source File: client_query_destination_table.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def client_query_destination_table(table_id):

    # [START bigquery_query_destination_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the destination table.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.QueryJobConfig(destination=table_id)

    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """

    # Start the query, passing in the extra configuration.
    query_job = client.query(sql, job_config=job_config)  # Make an API request.
    query_job.result()  # Wait for the job to complete.

    print("Query results loaded to the table {}".format(table_id))
    # [END bigquery_query_destination_table] 
Example #24
Source File: main_test.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def testPartitionsAndUsageDates(self):
        """Tests that the # of partitions is equal to the # of usage_start_times."""
        bq_client = bigquery.Client()
        job_config = bigquery.QueryJobConfig()
        usage_query = """
            SELECT COUNT(DISTINCT(DATE(usage_start_time))) AS cnt
            FROM `{billing_project_id}.{output_dataset_id}.{output_table_name}`
            """
        usage_query = usage_query.format(**config.config_vars)
        query_job = bq_client.query(usage_query, job_config=job_config)
        for row in query_job.result():
            output_result = row.cnt

        partition_query = """
            SELECT COUNT(DISTINCT(partition_id)) AS cnt
            FROM [{billing_project_id}.{output_dataset_id}.{output_table_name}$__PARTITIONS_SUMMARY__]
            """
        partition_query = partition_query.format(**config.config_vars)
        job_config = bigquery.QueryJobConfig()
        job_config.use_legacy_sql = True
        query_job = bq_client.query(partition_query, job_config=job_config)
        for row in query_job.result():
            partition_result = row.cnt
        assert output_result == partition_result 
Example #25
Source File: query_no_cache.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def query_no_cache():

    # [START bigquery_query_no_cache]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(use_query_cache=False)
    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """
    query_job = client.query(sql, job_config=job_config)  # Make an API request.

    for row in query_job:
        print(row)
    # [END bigquery_query_no_cache] 
Example #26
Source File: client_query_destination_table_legacy.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def client_query_destination_table_legacy(table_id):

    # [START bigquery_query_legacy_large_results]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the destination table.
    # table_id = "your-project.your_dataset.your_table_name"

    # Set the destination table and use_legacy_sql to True to use
    # legacy SQL syntax.
    job_config = bigquery.QueryJobConfig(
        allow_large_results=True, destination=table_id, use_legacy_sql=True
    )

    sql = """
        SELECT corpus
        FROM [bigquery-public-data:samples.shakespeare]
        GROUP BY corpus;
    """

    # Start the query, passing in the extra configuration.
    query_job = client.query(sql, job_config=job_config)  # Make an API request.
    query_job.result()  # Wait for the job to complete.

    print("Query results loaded to the table {}".format(table_id))
    # [END bigquery_query_legacy_large_results] 
Example #27
Source File: client_query_w_positional_params.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def client_query_w_positional_params():

    # [START bigquery_query_params_positional]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = """
        SELECT word, word_count
        FROM `bigquery-public-data.samples.shakespeare`
        WHERE corpus = ?
        AND word_count >= ?
        ORDER BY word_count DESC;
    """
    # Set the name to None to use positional parameters.
    # Note that you cannot mix named and positional parameters.
    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"),
            bigquery.ScalarQueryParameter(None, "INT64", 250),
        ]
    )
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    for row in query_job:
        print("{}: \t{}".format(row.word, row.word_count))
    # [END bigquery_query_params_positional] 
Example #28
Source File: query_external_gcs_temporary_table.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def query_external_gcs_temporary_table():

    # [START bigquery_query_external_gcs_temp]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # Configure the external data source and query job.
    external_config = bigquery.ExternalConfig("CSV")
    external_config.source_uris = [
        "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
    ]
    external_config.schema = [
        bigquery.SchemaField("name", "STRING"),
        bigquery.SchemaField("post_abbr", "STRING"),
    ]
    external_config.options.skip_leading_rows = 1
    table_id = "us_states"
    job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config})

    # Example query to find states starting with 'W'.
    sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)

    query_job = client.query(sql, job_config=job_config)  # Make an API request.

    w_states = list(query_job)  # Wait for the job to complete.
    print("There are {} states with names starting with W.".format(len(w_states)))
    # [END bigquery_query_external_gcs_temp] 
Example #29
Source File: client_query_w_named_params.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def client_query_w_named_params():

    # [START bigquery_query_params_named]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = """
        SELECT word, word_count
        FROM `bigquery-public-data.samples.shakespeare`
        WHERE corpus = @corpus
        AND word_count >= @min_word_count
        ORDER BY word_count DESC;
    """
    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
            bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
        ]
    )
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    for row in query_job:
        print("{}: \t{}".format(row.word, row.word_count))
    # [END bigquery_query_params_named] 
Example #30
Source File: bigQueryUtil.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def queryBQ(self, query_str, query_params=None, location ='US', batch_mode = False, dry_run = False,
                verbose = False):

        job_config = bigquery.QueryJobConfig()
        job_config.dry_run = dry_run

        if batch_mode:
            # Run at batch priority, which won't count toward concurrent rate limit.
            job_config.priority = bigquery.QueryPriority.BATCH

        if query_params:
            job_config.query_parameters = query_params

        query_job = self.client.query(query_str, location=location, job_config=job_config)

        if batch_mode: # wait until job is done
            while query_job.state != 'DONE':
                query_job = self.client.get_job(query_job.job_id, location=location)
                log.info('Job {} is currently in state {}'.format(query_job.job_id, query_job.state))
                #print('Job {} is currently in state {}'.format(query_job.job_id, query_job.state))
                time.sleep(5)

        if verbose:
            log.info('This query will process {} bytes:'.format(query_job.total_bytes_processed))   # TODO (nodir) always None - query isn't processed yet
            log.info('Query: ' + query_str)
            #print('This query will process {} bytes.'.format(query_job.total_bytes_processed))

        return query_job