Python google.cloud.bigquery.QueryJobConfig() Examples
The following are 30
code examples of google.cloud.bigquery.QueryJobConfig().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
google.cloud.bigquery
, or try the search function
.
Example #1
Source File: main.py From professional-services with Apache License 2.0 | 7 votes |
def execute_transformation_query(bq_client): """Executes transformation query to a new destination table. Args: bq_client: Object representing a reference to a BigQuery Client """ dataset_ref = bq_client.get_dataset(bigquery.DatasetReference( project=config.config_vars['billing_project_id'], dataset_id=config.config_vars['output_dataset_id'])) table_ref = dataset_ref.table(config.config_vars['output_table_name']) job_config = bigquery.QueryJobConfig() job_config.destination = table_ref job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE job_config.time_partitioning = bigquery.TimePartitioning( field='usage_start_time', expiration_ms=None) sql = file_to_string(config.config_vars['sql_file_path']) sql = sql.format(**config.config_vars) logging.info('Attempting query on all dates...') # Execute Query query_job = bq_client.query( sql, job_config=job_config) query_job.result() # Waits for the query to finish logging.info('Transformation query complete. All partitions are updated.')
Example #2
Source File: query_no_cache.py From python-bigquery with Apache License 2.0 | 6 votes |
def query_no_cache(): # [START bigquery_query_no_cache] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() job_config = bigquery.QueryJobConfig(use_query_cache=False) sql = """ SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ query_job = client.query(sql, job_config=job_config) # Make an API request. for row in query_job: print(row) # [END bigquery_query_no_cache]
Example #3
Source File: user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def get_max_ingest_timestamp(self): """Gets the max timestamp that was set during the latest merge. Returns: latest_merge as a timestamp in string format. """ get_last_max_ts_config = bigquery.QueryJobConfig() get_last_max_ts_config.use_legacy_sql = False get_last_max_timestamp_query = self.bq_client.query( query='SELECT max(ingestTimestamp) as max_ingest_timestamp ' 'FROM `{0:s}.{1:s}.{2:s}`'.format(self.project_id, self.dataset_id, self.temp_updates_table_id), job_config=get_last_max_ts_config, location='US') get_last_max_timestamp_query.result() results = list(get_last_max_timestamp_query) max_ingest_timestamp = results[0]['max_ingest_timestamp'] if not max_ingest_timestamp: max_ingest_timestamp = INITIAL_TIMESTAMP return max_ingest_timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %Z')
Example #4
Source File: user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def merge_updates(self, merge_updates_query): """Merges rows from the temp table into the final table. Args: merge_updates_query(str): Query for merging updates from the temp updates table to the final table. """ logging.info('{0:s} Merging updates from {1:s} into {2:s}.'.format( str(datetime.datetime.now()), self.temp_updates_table_id, self.final_table_id)) merge_updates_job_config = bigquery.QueryJobConfig() merge_updates_job_config.use_legacy_sql = False merge_updates_query_job = self.bq_client.query( query=merge_updates_query, location='US', job_config=merge_updates_job_config) merge_updates_query_job.result() logging.info('{0:s} Successfully merged updates into {1:s}.'.format( str(datetime.datetime.now()), self.final_table_id))
Example #5
Source File: user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def get_max_ingest_timestamp(self): """Gets the max timestamp that was set during the latest merge. Returns: latest_merge as a timestamp in string format. """ get_last_max_ts_config = bigquery.QueryJobConfig() get_last_max_ts_config.use_legacy_sql = False get_last_max_timestamp_query = self.bq_client.query( query='SELECT max(ingestTimestamp) as max_ingest_timestamp ' 'FROM `{0:s}.{1:s}.{2:s}`'.format(self.project_id, self.dataset_id, self.temp_updates_table_id), job_config=get_last_max_ts_config, location='US') get_last_max_timestamp_query.result() results = list(get_last_max_timestamp_query) max_ingest_timestamp = results[0]['max_ingest_timestamp'] if not max_ingest_timestamp: max_ingest_timestamp = INITIAL_TIMESTAMP return max_ingest_timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %Z')
Example #6
Source File: client_query_legacy_sql.py From python-bigquery with Apache License 2.0 | 6 votes |
def client_query_legacy_sql(): # [START bigquery_query_legacy] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() query = ( "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] " 'WHERE state = "TX" ' "LIMIT 100" ) # Set use_legacy_sql to True to use legacy SQL syntax. job_config = bigquery.QueryJobConfig(use_legacy_sql=True) # Start the query, passing in the extra configuration. query_job = client.query(query, job_config=job_config) # Make an API request. print("The query data:") for row in query_job: print(row) # [END bigquery_query_legacy]
Example #7
Source File: taar_similarity.py From telemetry-airflow with Mozilla Public License 2.0 | 6 votes |
def get_table(view): """Helper for determining what table underlies a user-facing view, since the Storage API can't read views.""" bq = bigquery.Client() view = view.replace(":", ".") # partition filter is required, so try a couple options for partition_column in ["DATE(submission_timestamp)", "submission_date"]: try: job = bq.query( f"SELECT * FROM `{view}` WHERE {partition_column} = CURRENT_DATE", bigquery.QueryJobConfig(dry_run=True), ) break except Exception: continue else: raise ValueError("could not determine partition column") assert len(job.referenced_tables) == 1, "View combines multiple tables" table = job.referenced_tables[0] return f"{table.project}:{table.dataset_id}.{table.table_id}"
Example #8
Source File: samples_test.py From python-docs-samples with Apache License 2.0 | 6 votes |
def test_client_library_query_with_parameters(): # [START bigquery_migration_client_library_query_parameters] from google.cloud import bigquery client = bigquery.Client() sql = """ SELECT name FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE state = @state LIMIT @limit """ query_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter('state', 'STRING', 'TX'), bigquery.ScalarQueryParameter('limit', 'INTEGER', 100) ] ) df = client.query(sql, job_config=query_config).to_dataframe() # [END bigquery_migration_client_library_query_parameters] assert len(df) > 0
Example #9
Source File: samples_test.py From python-docs-samples with Apache License 2.0 | 6 votes |
def test_client_library_legacy_query(): # [START bigquery_migration_client_library_query_legacy] from google.cloud import bigquery client = bigquery.Client() sql = """ SELECT name FROM [bigquery-public-data:usa_names.usa_1910_current] WHERE state = 'TX' LIMIT 100 """ query_config = bigquery.QueryJobConfig(use_legacy_sql=True) df = client.query(sql, job_config=query_config).to_dataframe() # [END bigquery_migration_client_library_query_legacy] assert len(df) > 0
Example #10
Source File: gcp_utils.py From recommender-tensorflow with MIT License | 6 votes |
def bigquery_to_table(query, table_id, dataset_id, client): table = get_bigquery_table(table_id, dataset_id, client) # set config: insert overwrite to table job_config = bigquery.QueryJobConfig( destination=table, write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE ) # query and insert to table job = client.query(query, job_config=job_config) job.result() logger.info("query results loaded to table: %s.", table.path) return table
Example #11
Source File: BigQueryConnect.py From CDSS with GNU General Public License v3.0 | 6 votes |
def queryBQ(self, query_str: str, location: str ='US', batch_mode: bool = False, dry_run: bool = False, verbose: bool = False) -> bigquery.job.QueryJob: job_config = bigquery.QueryJobConfig() job_config.dry_run = dry_run if batch_mode: # Run at batch priority, which won't count toward concurrent rate limit. job_config.priority = bigquery.QueryPriority.BATCH query_job = self.client.query(query_str, location=location, job_config=job_config) if batch_mode: # wait until job is done while query_job.state != 'DONE': query_job = self.client.get_job(query_job.job_id, location=location) print("Job {} is currently in state {}".format(query_job.job_id, query_job.state)) time.sleep(5) if verbose: print("This query will process {} bytes.".format(query_job.total_bytes_processed)) return query_job
Example #12
Source File: bigquery.py From openprescribing with MIT License | 6 votes |
def run_job(self, method_name, args, config_opts, config_default_opts): job_config = { "copy_table": gcbq.CopyJobConfig, "extract_table": gcbq.ExtractJobConfig, "load_table_from_file": gcbq.LoadJobConfig, "load_table_from_uri": gcbq.LoadJobConfig, "query": gcbq.QueryJobConfig, }[method_name]() for k, v in config_default_opts.items(): setattr(job_config, k, v) for k, v in config_opts.items(): setattr(job_config, k, v) method = getattr(self.gcbq_client, method_name) job = method(*args, job_config=job_config) if getattr(job_config, "dry_run", False): return [] else: return job.result()
Example #13
Source File: user_info_updater.py From professional-services with Apache License 2.0 | 6 votes |
def merge_updates(self, merge_updates_query): """Merges rows from the temp table into the final table. Args: merge_updates_query(str): Query for merging updates from the temp updates table to the final table. """ logging.info('{0:s} Merging updates from {1:s} into {2:s}.'.format( str(datetime.datetime.now()), self.temp_updates_table_id, self.final_table_id)) merge_updates_job_config = bigquery.QueryJobConfig() merge_updates_job_config.use_legacy_sql = False merge_updates_query_job = self.bq_client.query( query=merge_updates_query, location='US', job_config=merge_updates_job_config) merge_updates_query_job.result() logging.info('{0:s} Successfully merged updates into {1:s}.'.format( str(datetime.datetime.now()), self.final_table_id))
Example #14
Source File: client_query_dry_run.py From python-bigquery with Apache License 2.0 | 6 votes |
def client_query_dry_run(): # [START bigquery_query_dry_run] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) # Start the query, passing in the extra configuration. query_job = client.query( ( "SELECT name, COUNT(*) as name_count " "FROM `bigquery-public-data.usa_names.usa_1910_2013` " "WHERE state = 'WA' " "GROUP BY name" ), job_config=job_config, ) # Make an API request. # A dry run query completes immediately. print("This query will process {} bytes.".format(query_job.total_bytes_processed)) # [END bigquery_query_dry_run] return query_job
Example #15
Source File: client_query_dry_run.py From python-bigquery with Apache License 2.0 | 6 votes |
def client_query_dry_run(): # [START bigquery_query_dry_run] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) # Start the query, passing in the extra configuration. query_job = client.query( ( "SELECT name, COUNT(*) as name_count " "FROM `bigquery-public-data.usa_names.usa_1910_2013` " "WHERE state = 'WA' " "GROUP BY name" ), job_config=job_config, ) # Make an API request. # A dry run query completes immediately. print("This query will process {} bytes.".format(query_job.total_bytes_processed)) # [END bigquery_query_dry_run] return query_job
Example #16
Source File: client_query_destination_table.py From python-bigquery with Apache License 2.0 | 6 votes |
def client_query_destination_table(table_id): # [START bigquery_query_destination_table] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" job_config = bigquery.QueryJobConfig(destination=table_id) sql = """ SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ # Start the query, passing in the extra configuration. query_job = client.query(sql, job_config=job_config) # Make an API request. query_job.result() # Wait for the job to complete. print("Query results loaded to the table {}".format(table_id)) # [END bigquery_query_destination_table]
Example #17
Source File: client_query_legacy_sql.py From python-bigquery with Apache License 2.0 | 6 votes |
def client_query_legacy_sql(): # [START bigquery_query_legacy] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() query = ( "SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] " 'WHERE state = "TX" ' "LIMIT 100" ) # Set use_legacy_sql to True to use legacy SQL syntax. job_config = bigquery.QueryJobConfig(use_legacy_sql=True) # Start the query, passing in the extra configuration. query_job = client.query(query, job_config=job_config) # Make an API request. print("The query data:") for row in query_job: print(row) # [END bigquery_query_legacy]
Example #18
Source File: magics.py From python-bigquery with Apache License 2.0 | 6 votes |
def default_query_job_config(self): """google.cloud.bigquery.job.QueryJobConfig: Default job configuration for queries. The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is used for queries. Some properties can be overridden with arguments to the magics. Example: Manually setting the default value for ``maximum_bytes_billed`` to 100 MB: >>> from google.cloud.bigquery import magics >>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000 """ return self._default_query_job_config
Example #19
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_querying_data_w_timeout(self): job_config = bigquery.QueryJobConfig() job_config.use_query_cache = False query_job = Config.CLIENT.query( """ SELECT name, SUM(number) AS total_people FROM `bigquery-public-data.usa_names.usa_1910_current` GROUP BY name """, location="US", job_config=job_config, ) # Specify a very tight deadline to demonstrate that the timeout # actually has effect. with self.assertRaises(requests.exceptions.Timeout): query_job.done(timeout=0.1) # Now wait for the result using a more realistic deadline. query_job.result(timeout=30) self.assertTrue(query_job.done(timeout=30))
Example #20
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_query_w_legacy_sql_types(self): naive = datetime.datetime(2016, 12, 5, 12, 41, 9) stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat()) zoned = naive.replace(tzinfo=UTC) examples = [ {"sql": "SELECT 1", "expected": 1}, {"sql": "SELECT 1.3", "expected": 1.3}, {"sql": "SELECT TRUE", "expected": True}, {"sql": 'SELECT "ABC"', "expected": "ABC"}, {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"}, {"sql": 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), "expected": zoned}, ] for example in examples: job_config = bigquery.QueryJobConfig() job_config.use_legacy_sql = True rows = list(Config.CLIENT.query(example["sql"], job_config=job_config)) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) self.assertEqual(rows[0][0], example["expected"])
Example #21
Source File: create_job.py From python-bigquery with Apache License 2.0 | 6 votes |
def create_job(): # [START bigquery_create_job] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() query_job = client.query( "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`", # Explicitly force job execution to be routed to a specific processing # location. location="US", # Specify a job configuration to set optional job resource properties. job_config=bigquery.QueryJobConfig( labels={"example-label": "example-value"}, maximum_bytes_billed=1000000 ), # The client libraries automatically generate a job ID. Override the # generated ID with either the job_id_prefix or job_id parameters. job_id_prefix="code_sample_", ) # Make an API request. print("Started job: {}".format(query_job.job_id)) # [END bigquery_create_job] return query_job
Example #22
Source File: create_job.py From python-bigquery with Apache License 2.0 | 6 votes |
def create_job(): # [START bigquery_create_job] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() query_job = client.query( "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`", # Explicitly force job execution to be routed to a specific processing # location. location="US", # Specify a job configuration to set optional job resource properties. job_config=bigquery.QueryJobConfig( labels={"example-label": "example-value"}, maximum_bytes_billed=1000000 ), # The client libraries automatically generate a job ID. Override the # generated ID with either the job_id_prefix or job_id parameters. job_id_prefix="code_sample_", ) # Make an API request. print("Started job: {}".format(query_job.job_id)) # [END bigquery_create_job] return query_job
Example #23
Source File: client_query_destination_table.py From python-bigquery with Apache License 2.0 | 6 votes |
def client_query_destination_table(table_id): # [START bigquery_query_destination_table] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" job_config = bigquery.QueryJobConfig(destination=table_id) sql = """ SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ # Start the query, passing in the extra configuration. query_job = client.query(sql, job_config=job_config) # Make an API request. query_job.result() # Wait for the job to complete. print("Query results loaded to the table {}".format(table_id)) # [END bigquery_query_destination_table]
Example #24
Source File: main_test.py From professional-services with Apache License 2.0 | 6 votes |
def testPartitionsAndUsageDates(self): """Tests that the # of partitions is equal to the # of usage_start_times.""" bq_client = bigquery.Client() job_config = bigquery.QueryJobConfig() usage_query = """ SELECT COUNT(DISTINCT(DATE(usage_start_time))) AS cnt FROM `{billing_project_id}.{output_dataset_id}.{output_table_name}` """ usage_query = usage_query.format(**config.config_vars) query_job = bq_client.query(usage_query, job_config=job_config) for row in query_job.result(): output_result = row.cnt partition_query = """ SELECT COUNT(DISTINCT(partition_id)) AS cnt FROM [{billing_project_id}.{output_dataset_id}.{output_table_name}$__PARTITIONS_SUMMARY__] """ partition_query = partition_query.format(**config.config_vars) job_config = bigquery.QueryJobConfig() job_config.use_legacy_sql = True query_job = bq_client.query(partition_query, job_config=job_config) for row in query_job.result(): partition_result = row.cnt assert output_result == partition_result
Example #25
Source File: query_no_cache.py From python-bigquery with Apache License 2.0 | 6 votes |
def query_no_cache(): # [START bigquery_query_no_cache] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() job_config = bigquery.QueryJobConfig(use_query_cache=False) sql = """ SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus; """ query_job = client.query(sql, job_config=job_config) # Make an API request. for row in query_job: print(row) # [END bigquery_query_no_cache]
Example #26
Source File: client_query_destination_table_legacy.py From python-bigquery with Apache License 2.0 | 5 votes |
def client_query_destination_table_legacy(table_id): # [START bigquery_query_legacy_large_results] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the destination table. # table_id = "your-project.your_dataset.your_table_name" # Set the destination table and use_legacy_sql to True to use # legacy SQL syntax. job_config = bigquery.QueryJobConfig( allow_large_results=True, destination=table_id, use_legacy_sql=True ) sql = """ SELECT corpus FROM [bigquery-public-data:samples.shakespeare] GROUP BY corpus; """ # Start the query, passing in the extra configuration. query_job = client.query(sql, job_config=job_config) # Make an API request. query_job.result() # Wait for the job to complete. print("Query results loaded to the table {}".format(table_id)) # [END bigquery_query_legacy_large_results]
Example #27
Source File: client_query_w_positional_params.py From python-bigquery with Apache License 2.0 | 5 votes |
def client_query_w_positional_params(): # [START bigquery_query_params_positional] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() query = """ SELECT word, word_count FROM `bigquery-public-data.samples.shakespeare` WHERE corpus = ? AND word_count >= ? ORDER BY word_count DESC; """ # Set the name to None to use positional parameters. # Note that you cannot mix named and positional parameters. job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"), bigquery.ScalarQueryParameter(None, "INT64", 250), ] ) query_job = client.query(query, job_config=job_config) # Make an API request. for row in query_job: print("{}: \t{}".format(row.word, row.word_count)) # [END bigquery_query_params_positional]
Example #28
Source File: query_external_gcs_temporary_table.py From python-bigquery with Apache License 2.0 | 5 votes |
def query_external_gcs_temporary_table(): # [START bigquery_query_external_gcs_temp] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # Configure the external data source and query job. external_config = bigquery.ExternalConfig("CSV") external_config.source_uris = [ "gs://cloud-samples-data/bigquery/us-states/us-states.csv" ] external_config.schema = [ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ] external_config.options.skip_leading_rows = 1 table_id = "us_states" job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config}) # Example query to find states starting with 'W'. sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) query_job = client.query(sql, job_config=job_config) # Make an API request. w_states = list(query_job) # Wait for the job to complete. print("There are {} states with names starting with W.".format(len(w_states))) # [END bigquery_query_external_gcs_temp]
Example #29
Source File: client_query_w_named_params.py From python-bigquery with Apache License 2.0 | 5 votes |
def client_query_w_named_params(): # [START bigquery_query_params_named] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() query = """ SELECT word, word_count FROM `bigquery-public-data.samples.shakespeare` WHERE corpus = @corpus AND word_count >= @min_word_count ORDER BY word_count DESC; """ job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"), bigquery.ScalarQueryParameter("min_word_count", "INT64", 250), ] ) query_job = client.query(query, job_config=job_config) # Make an API request. for row in query_job: print("{}: \t{}".format(row.word, row.word_count)) # [END bigquery_query_params_named]
Example #30
Source File: bigQueryUtil.py From CDSS with GNU General Public License v3.0 | 5 votes |
def queryBQ(self, query_str, query_params=None, location ='US', batch_mode = False, dry_run = False, verbose = False): job_config = bigquery.QueryJobConfig() job_config.dry_run = dry_run if batch_mode: # Run at batch priority, which won't count toward concurrent rate limit. job_config.priority = bigquery.QueryPriority.BATCH if query_params: job_config.query_parameters = query_params query_job = self.client.query(query_str, location=location, job_config=job_config) if batch_mode: # wait until job is done while query_job.state != 'DONE': query_job = self.client.get_job(query_job.job_id, location=location) log.info('Job {} is currently in state {}'.format(query_job.job_id, query_job.state)) #print('Job {} is currently in state {}'.format(query_job.job_id, query_job.state)) time.sleep(5) if verbose: log.info('This query will process {} bytes:'.format(query_job.total_bytes_processed)) # TODO (nodir) always None - query isn't processed yet log.info('Query: ' + query_str) #print('This query will process {} bytes.'.format(query_job.total_bytes_processed)) return query_job