Python Examples of google.cloud.bigquery.QueryJobConfig

Source File: main.py From professional-services with Apache License 2.0

7 votes

def execute_transformation_query(bq_client):
    """Executes transformation query to a new destination table.
    Args:
        bq_client: Object representing a reference to a BigQuery Client
    """
    dataset_ref = bq_client.get_dataset(bigquery.DatasetReference(
        project=config.config_vars['billing_project_id'],
        dataset_id=config.config_vars['output_dataset_id']))
    table_ref = dataset_ref.table(config.config_vars['output_table_name'])
    job_config = bigquery.QueryJobConfig()
    job_config.destination = table_ref
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    job_config.time_partitioning = bigquery.TimePartitioning(
        field='usage_start_time',
        expiration_ms=None)
    sql = file_to_string(config.config_vars['sql_file_path'])
    sql = sql.format(**config.config_vars)
    logging.info('Attempting query on all dates...')
    # Execute Query
    query_job = bq_client.query(
        sql,
        job_config=job_config)

    query_job.result()  # Waits for the query to finish
    logging.info('Transformation query complete. All partitions are updated.')

Source File: query_no_cache.py From python-bigquery with Apache License 2.0

6 votes

def query_no_cache():

    # [START bigquery_query_no_cache]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(use_query_cache=False)
    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """
    query_job = client.query(sql, job_config=job_config)  # Make an API request.

    for row in query_job:
        print(row)
    # [END bigquery_query_no_cache]

Source File: user_info_updater.py From professional-services with Apache License 2.0

6 votes

def get_max_ingest_timestamp(self):
        """Gets the max timestamp that was set during the latest merge.

        Returns:
            latest_merge as a timestamp in string format.
        """
        get_last_max_ts_config = bigquery.QueryJobConfig()
        get_last_max_ts_config.use_legacy_sql = False
        get_last_max_timestamp_query = self.bq_client.query(
            query='SELECT max(ingestTimestamp) as max_ingest_timestamp '
            'FROM `{0:s}.{1:s}.{2:s}`'.format(self.project_id, self.dataset_id,
                                              self.temp_updates_table_id),
            job_config=get_last_max_ts_config,
            location='US')
        get_last_max_timestamp_query.result()
        results = list(get_last_max_timestamp_query)
        max_ingest_timestamp = results[0]['max_ingest_timestamp']
        if not max_ingest_timestamp:
            max_ingest_timestamp = INITIAL_TIMESTAMP

        return max_ingest_timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %Z')

Source File: user_info_updater.py From professional-services with Apache License 2.0

6 votes

def merge_updates(self, merge_updates_query):
        """Merges rows from the temp table into the final table.

        Args:
            merge_updates_query(str): Query for merging updates from the temp
                updates table to the final table.

        """

        logging.info('{0:s} Merging updates from {1:s} into {2:s}.'.format(
            str(datetime.datetime.now()), self.temp_updates_table_id,
            self.final_table_id))

        merge_updates_job_config = bigquery.QueryJobConfig()
        merge_updates_job_config.use_legacy_sql = False

        merge_updates_query_job = self.bq_client.query(
            query=merge_updates_query,
            location='US',
            job_config=merge_updates_job_config)
        merge_updates_query_job.result()
        logging.info('{0:s} Successfully merged updates into {1:s}.'.format(
            str(datetime.datetime.now()), self.final_table_id))

Source File: user_info_updater.py From professional-services with Apache License 2.0

6 votes

def get_max_ingest_timestamp(self):
        """Gets the max timestamp that was set during the latest merge.

        Returns:
            latest_merge as a timestamp in string format.
        """
        get_last_max_ts_config = bigquery.QueryJobConfig()
        get_last_max_ts_config.use_legacy_sql = False
        get_last_max_timestamp_query = self.bq_client.query(
            query='SELECT max(ingestTimestamp) as max_ingest_timestamp '
            'FROM `{0:s}.{1:s}.{2:s}`'.format(self.project_id, self.dataset_id,
                                              self.temp_updates_table_id),
            job_config=get_last_max_ts_config,
            location='US')
        get_last_max_timestamp_query.result()
        results = list(get_last_max_timestamp_query)
        max_ingest_timestamp = results[0]['max_ingest_timestamp']
        if not max_ingest_timestamp:
            max_ingest_timestamp = INITIAL_TIMESTAMP

        return max_ingest_timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %Z')

Source File: client_query_legacy_sql.py From python-bigquery with Apache License 2.0

6 votes

def client_query_legacy_sql():

    # [START bigquery_query_legacy]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = (
        "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
        'WHERE state = "TX" '
        "LIMIT 100"
    )

    # Set use_legacy_sql to True to use legacy SQL syntax.
    job_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    # Start the query, passing in the extra configuration.
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    print("The query data:")
    for row in query_job:
        print(row)
    # [END bigquery_query_legacy]

Source File: taar_similarity.py From telemetry-airflow with Mozilla Public License 2.0

6 votes

def get_table(view):
    """Helper for determining what table underlies a user-facing view, since the Storage API can't read views."""
    bq = bigquery.Client()
    view = view.replace(":", ".")
    # partition filter is required, so try a couple options
    for partition_column in ["DATE(submission_timestamp)", "submission_date"]:
        try:
            job = bq.query(
                f"SELECT * FROM `{view}` WHERE {partition_column} = CURRENT_DATE",
                bigquery.QueryJobConfig(dry_run=True),
            )
            break
        except Exception:
            continue
    else:
        raise ValueError("could not determine partition column")
    assert len(job.referenced_tables) == 1, "View combines multiple tables"
    table = job.referenced_tables[0]
    return f"{table.project}:{table.dataset_id}.{table.table_id}"

Source File: samples_test.py From python-docs-samples with Apache License 2.0

6 votes

def test_client_library_query_with_parameters():
    # [START bigquery_migration_client_library_query_parameters]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM `bigquery-public-data.usa_names.usa_1910_current`
        WHERE state = @state
        LIMIT @limit
    """
    query_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter('state', 'STRING', 'TX'),
            bigquery.ScalarQueryParameter('limit', 'INTEGER', 100)
        ]
    )

    df = client.query(sql, job_config=query_config).to_dataframe()
    # [END bigquery_migration_client_library_query_parameters]
    assert len(df) > 0

Source File: samples_test.py From python-docs-samples with Apache License 2.0

6 votes

def test_client_library_legacy_query():
    # [START bigquery_migration_client_library_query_legacy]
    from google.cloud import bigquery

    client = bigquery.Client()
    sql = """
        SELECT name
        FROM [bigquery-public-data:usa_names.usa_1910_current]
        WHERE state = 'TX'
        LIMIT 100
    """
    query_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    df = client.query(sql, job_config=query_config).to_dataframe()
    # [END bigquery_migration_client_library_query_legacy]
    assert len(df) > 0

Source File: gcp_utils.py From recommender-tensorflow with MIT License

6 votes

def bigquery_to_table(query, table_id, dataset_id, client):
    table = get_bigquery_table(table_id, dataset_id, client)

    # set config: insert overwrite to table
    job_config = bigquery.QueryJobConfig(
        destination=table,
        write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE
    )

    # query and insert to table
    job = client.query(query, job_config=job_config)
    job.result()
    logger.info("query results loaded to table: %s.", table.path)
    return table

Source File: BigQueryConnect.py From CDSS with GNU General Public License v3.0

6 votes

def queryBQ(self, query_str: str, location: str ='US', batch_mode: bool = False, dry_run: bool = False,
                verbose: bool = False) -> bigquery.job.QueryJob:

        job_config = bigquery.QueryJobConfig()
        job_config.dry_run = dry_run

        if batch_mode:
            # Run at batch priority, which won't count toward concurrent rate limit.
            job_config.priority = bigquery.QueryPriority.BATCH

        query_job = self.client.query(query_str, location=location, job_config=job_config)

        if batch_mode: # wait until job is done
            while query_job.state != 'DONE':
                query_job = self.client.get_job(query_job.job_id, location=location)
                print("Job {} is currently in state {}".format(query_job.job_id, query_job.state))
                time.sleep(5)

        if verbose:
            print("This query will process {} bytes.".format(query_job.total_bytes_processed))

        return query_job

Source File: bigquery.py From openprescribing with MIT License

6 votes

def run_job(self, method_name, args, config_opts, config_default_opts):
        job_config = {
            "copy_table": gcbq.CopyJobConfig,
            "extract_table": gcbq.ExtractJobConfig,
            "load_table_from_file": gcbq.LoadJobConfig,
            "load_table_from_uri": gcbq.LoadJobConfig,
            "query": gcbq.QueryJobConfig,
        }[method_name]()

        for k, v in config_default_opts.items():
            setattr(job_config, k, v)
        for k, v in config_opts.items():
            setattr(job_config, k, v)

        method = getattr(self.gcbq_client, method_name)

        job = method(*args, job_config=job_config)
        if getattr(job_config, "dry_run", False):
            return []
        else:
            return job.result()

Source File: user_info_updater.py From professional-services with Apache License 2.0

6 votes

def merge_updates(self, merge_updates_query):
        """Merges rows from the temp table into the final table.

        Args:
            merge_updates_query(str): Query for merging updates from the temp
                updates table to the final table.

        """

        logging.info('{0:s} Merging updates from {1:s} into {2:s}.'.format(
            str(datetime.datetime.now()), self.temp_updates_table_id,
            self.final_table_id))

        merge_updates_job_config = bigquery.QueryJobConfig()
        merge_updates_job_config.use_legacy_sql = False

        merge_updates_query_job = self.bq_client.query(
            query=merge_updates_query,
            location='US',
            job_config=merge_updates_job_config)
        merge_updates_query_job.result()
        logging.info('{0:s} Successfully merged updates into {1:s}.'.format(
            str(datetime.datetime.now()), self.final_table_id))

Source File: client_query_dry_run.py From python-bigquery with Apache License 2.0

6 votes

def client_query_dry_run():

    # [START bigquery_query_dry_run]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)

    # Start the query, passing in the extra configuration.
    query_job = client.query(
        (
            "SELECT name, COUNT(*) as name_count "
            "FROM `bigquery-public-data.usa_names.usa_1910_2013` "
            "WHERE state = 'WA' "
            "GROUP BY name"
        ),
        job_config=job_config,
    )  # Make an API request.

    # A dry run query completes immediately.
    print("This query will process {} bytes.".format(query_job.total_bytes_processed))
    # [END bigquery_query_dry_run]
    return query_job

Source File: client_query_dry_run.py From python-bigquery with Apache License 2.0

6 votes

def client_query_dry_run():

    # [START bigquery_query_dry_run]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)

    # Start the query, passing in the extra configuration.
    query_job = client.query(
        (
            "SELECT name, COUNT(*) as name_count "
            "FROM `bigquery-public-data.usa_names.usa_1910_2013` "
            "WHERE state = 'WA' "
            "GROUP BY name"
        ),
        job_config=job_config,
    )  # Make an API request.

    # A dry run query completes immediately.
    print("This query will process {} bytes.".format(query_job.total_bytes_processed))
    # [END bigquery_query_dry_run]
    return query_job

Source File: client_query_destination_table.py From python-bigquery with Apache License 2.0

6 votes

def client_query_destination_table(table_id):

    # [START bigquery_query_destination_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the destination table.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.QueryJobConfig(destination=table_id)

    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """

    # Start the query, passing in the extra configuration.
    query_job = client.query(sql, job_config=job_config)  # Make an API request.
    query_job.result()  # Wait for the job to complete.

    print("Query results loaded to the table {}".format(table_id))
    # [END bigquery_query_destination_table]

Source File: client_query_legacy_sql.py From python-bigquery with Apache License 2.0

6 votes

def client_query_legacy_sql():

    # [START bigquery_query_legacy]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = (
        "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
        'WHERE state = "TX" '
        "LIMIT 100"
    )

    # Set use_legacy_sql to True to use legacy SQL syntax.
    job_config = bigquery.QueryJobConfig(use_legacy_sql=True)

    # Start the query, passing in the extra configuration.
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    print("The query data:")
    for row in query_job:
        print(row)
    # [END bigquery_query_legacy]

Source File: magics.py From python-bigquery with Apache License 2.0

6 votes

def default_query_job_config(self):
        """google.cloud.bigquery.job.QueryJobConfig: Default job
        configuration for queries.

        The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
        used for queries. Some properties can be overridden with arguments to
        the magics.

        Example:
            Manually setting the default value for ``maximum_bytes_billed``
            to 100 MB:

            >>> from google.cloud.bigquery import magics
            >>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
        """
        return self._default_query_job_config

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_querying_data_w_timeout(self):
        job_config = bigquery.QueryJobConfig()
        job_config.use_query_cache = False

        query_job = Config.CLIENT.query(
            """
            SELECT name, SUM(number) AS total_people
            FROM `bigquery-public-data.usa_names.usa_1910_current`
            GROUP BY name
            """,
            location="US",
            job_config=job_config,
        )

        # Specify a very tight deadline to demonstrate that the timeout
        # actually has effect.
        with self.assertRaises(requests.exceptions.Timeout):
            query_job.done(timeout=0.1)

        # Now wait for the result using a more realistic deadline.
        query_job.result(timeout=30)
        self.assertTrue(query_job.done(timeout=30))

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_query_w_legacy_sql_types(self):
        naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
        stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat())
        zoned = naive.replace(tzinfo=UTC)
        examples = [
            {"sql": "SELECT 1", "expected": 1},
            {"sql": "SELECT 1.3", "expected": 1.3},
            {"sql": "SELECT TRUE", "expected": True},
            {"sql": 'SELECT "ABC"', "expected": "ABC"},
            {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"},
            {"sql": 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), "expected": zoned},
        ]
        for example in examples:
            job_config = bigquery.QueryJobConfig()
            job_config.use_legacy_sql = True
            rows = list(Config.CLIENT.query(example["sql"], job_config=job_config))
            self.assertEqual(len(rows), 1)
            self.assertEqual(len(rows[0]), 1)
            self.assertEqual(rows[0][0], example["expected"])

Source File: create_job.py From python-bigquery with Apache License 2.0

6 votes

def create_job():

    # [START bigquery_create_job]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query_job = client.query(
        "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`",
        # Explicitly force job execution to be routed to a specific processing
        # location.
        location="US",
        # Specify a job configuration to set optional job resource properties.
        job_config=bigquery.QueryJobConfig(
            labels={"example-label": "example-value"}, maximum_bytes_billed=1000000
        ),
        # The client libraries automatically generate a job ID. Override the
        # generated ID with either the job_id_prefix or job_id parameters.
        job_id_prefix="code_sample_",
    )  # Make an API request.

    print("Started job: {}".format(query_job.job_id))
    # [END bigquery_create_job]
    return query_job

Source File: create_job.py From python-bigquery with Apache License 2.0

6 votes

def create_job():

    # [START bigquery_create_job]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query_job = client.query(
        "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`",
        # Explicitly force job execution to be routed to a specific processing
        # location.
        location="US",
        # Specify a job configuration to set optional job resource properties.
        job_config=bigquery.QueryJobConfig(
            labels={"example-label": "example-value"}, maximum_bytes_billed=1000000
        ),
        # The client libraries automatically generate a job ID. Override the
        # generated ID with either the job_id_prefix or job_id parameters.
        job_id_prefix="code_sample_",
    )  # Make an API request.

    print("Started job: {}".format(query_job.job_id))
    # [END bigquery_create_job]
    return query_job

Source File: client_query_destination_table.py From python-bigquery with Apache License 2.0

6 votes

def client_query_destination_table(table_id):

    # [START bigquery_query_destination_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the destination table.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.QueryJobConfig(destination=table_id)

    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """

    # Start the query, passing in the extra configuration.
    query_job = client.query(sql, job_config=job_config)  # Make an API request.
    query_job.result()  # Wait for the job to complete.

    print("Query results loaded to the table {}".format(table_id))
    # [END bigquery_query_destination_table]

Source File: main_test.py From professional-services with Apache License 2.0

6 votes

def testPartitionsAndUsageDates(self):
        """Tests that the # of partitions is equal to the # of usage_start_times."""
        bq_client = bigquery.Client()
        job_config = bigquery.QueryJobConfig()
        usage_query = """
            SELECT COUNT(DISTINCT(DATE(usage_start_time))) AS cnt
            FROM `{billing_project_id}.{output_dataset_id}.{output_table_name}`
            """
        usage_query = usage_query.format(**config.config_vars)
        query_job = bq_client.query(usage_query, job_config=job_config)
        for row in query_job.result():
            output_result = row.cnt

        partition_query = """
            SELECT COUNT(DISTINCT(partition_id)) AS cnt
            FROM [{billing_project_id}.{output_dataset_id}.{output_table_name}$__PARTITIONS_SUMMARY__]
            """
        partition_query = partition_query.format(**config.config_vars)
        job_config = bigquery.QueryJobConfig()
        job_config.use_legacy_sql = True
        query_job = bq_client.query(partition_query, job_config=job_config)
        for row in query_job.result():
            partition_result = row.cnt
        assert output_result == partition_result

Source File: query_no_cache.py From python-bigquery with Apache License 2.0

6 votes

def query_no_cache():

    # [START bigquery_query_no_cache]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    job_config = bigquery.QueryJobConfig(use_query_cache=False)
    sql = """
        SELECT corpus
        FROM `bigquery-public-data.samples.shakespeare`
        GROUP BY corpus;
    """
    query_job = client.query(sql, job_config=job_config)  # Make an API request.

    for row in query_job:
        print(row)
    # [END bigquery_query_no_cache]

Source File: client_query_destination_table_legacy.py From python-bigquery with Apache License 2.0

5 votes

def client_query_destination_table_legacy(table_id):

    # [START bigquery_query_legacy_large_results]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the destination table.
    # table_id = "your-project.your_dataset.your_table_name"

    # Set the destination table and use_legacy_sql to True to use
    # legacy SQL syntax.
    job_config = bigquery.QueryJobConfig(
        allow_large_results=True, destination=table_id, use_legacy_sql=True
    )

    sql = """
        SELECT corpus
        FROM [bigquery-public-data:samples.shakespeare]
        GROUP BY corpus;
    """

    # Start the query, passing in the extra configuration.
    query_job = client.query(sql, job_config=job_config)  # Make an API request.
    query_job.result()  # Wait for the job to complete.

    print("Query results loaded to the table {}".format(table_id))
    # [END bigquery_query_legacy_large_results]

Source File: client_query_w_positional_params.py From python-bigquery with Apache License 2.0

5 votes

def client_query_w_positional_params():

    # [START bigquery_query_params_positional]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = """
        SELECT word, word_count
        FROM `bigquery-public-data.samples.shakespeare`
        WHERE corpus = ?
        AND word_count >= ?
        ORDER BY word_count DESC;
    """
    # Set the name to None to use positional parameters.
    # Note that you cannot mix named and positional parameters.
    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"),
            bigquery.ScalarQueryParameter(None, "INT64", 250),
        ]
    )
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    for row in query_job:
        print("{}: \t{}".format(row.word, row.word_count))
    # [END bigquery_query_params_positional]

Source File: query_external_gcs_temporary_table.py From python-bigquery with Apache License 2.0

5 votes

def query_external_gcs_temporary_table():

    # [START bigquery_query_external_gcs_temp]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # Configure the external data source and query job.
    external_config = bigquery.ExternalConfig("CSV")
    external_config.source_uris = [
        "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
    ]
    external_config.schema = [
        bigquery.SchemaField("name", "STRING"),
        bigquery.SchemaField("post_abbr", "STRING"),
    ]
    external_config.options.skip_leading_rows = 1
    table_id = "us_states"
    job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config})

    # Example query to find states starting with 'W'.
    sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)

    query_job = client.query(sql, job_config=job_config)  # Make an API request.

    w_states = list(query_job)  # Wait for the job to complete.
    print("There are {} states with names starting with W.".format(len(w_states)))
    # [END bigquery_query_external_gcs_temp]

Source File: client_query_w_named_params.py From python-bigquery with Apache License 2.0

5 votes

def client_query_w_named_params():

    # [START bigquery_query_params_named]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    query = """
        SELECT word, word_count
        FROM `bigquery-public-data.samples.shakespeare`
        WHERE corpus = @corpus
        AND word_count >= @min_word_count
        ORDER BY word_count DESC;
    """
    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
            bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
        ]
    )
    query_job = client.query(query, job_config=job_config)  # Make an API request.

    for row in query_job:
        print("{}: \t{}".format(row.word, row.word_count))
    # [END bigquery_query_params_named]

Source File: bigQueryUtil.py From CDSS with GNU General Public License v3.0

5 votes

def queryBQ(self, query_str, query_params=None, location ='US', batch_mode = False, dry_run = False,
                verbose = False):

        job_config = bigquery.QueryJobConfig()
        job_config.dry_run = dry_run

        if batch_mode:
            # Run at batch priority, which won't count toward concurrent rate limit.
            job_config.priority = bigquery.QueryPriority.BATCH

        if query_params:
            job_config.query_parameters = query_params

        query_job = self.client.query(query_str, location=location, job_config=job_config)

        if batch_mode: # wait until job is done
            while query_job.state != 'DONE':
                query_job = self.client.get_job(query_job.job_id, location=location)
                log.info('Job {} is currently in state {}'.format(query_job.job_id, query_job.state))
                #print('Job {} is currently in state {}'.format(query_job.job_id, query_job.state))
                time.sleep(5)

        if verbose:
            log.info('This query will process {} bytes:'.format(query_job.total_bytes_processed))   # TODO (nodir) always None - query isn't processed yet
            log.info('Query: ' + query_str)
            #print('This query will process {} bytes.'.format(query_job.total_bytes_processed))

        return query_job

Python google.cloud.bigquery.QueryJobConfig() Examples