Python google.cloud.bigquery.SchemaField() Examples
The following are 30
code examples of google.cloud.bigquery.SchemaField().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
google.cloud.bigquery
, or try the search function
.
Example #1
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_update_table_schema(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE") schema = table.schema schema.append(voter) table.schema = schema updated_table = Config.CLIENT.update_table(table, ["schema"]) self.assertEqual(len(updated_table.schema), len(schema)) for found, expected in zip(updated_table.schema, schema): self.assertEqual(found.name, expected.name) self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode)
Example #2
Source File: create_table.py From python-bigquery with Apache License 2.0 | 6 votes |
def create_table(table_id): # [START bigquery_create_table] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] table = bigquery.Table(table_id, schema=schema) table = client.create_table(table) # Make an API request. print( "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) ) # [END bigquery_create_table]
Example #3
Source File: test_update_table_require_partition_filter.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_update_table_require_partition_filter(capsys, random_table_id, client): # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] table = bigquery.Table(random_table_id, schema=schema) table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp") table = client.create_table(table) update_table_require_partition_filter.update_table_require_partition_filter( random_table_id ) out, _ = capsys.readouterr() assert ( "Updated table '{}' with require_partition_filter=True".format(random_table_id) in out )
Example #4
Source File: create_table.py From python-bigquery with Apache License 2.0 | 6 votes |
def create_table(table_id): # [START bigquery_create_table] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] table = bigquery.Table(table_id, schema=schema) table = client.create_table(table) # Make an API request. print( "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) ) # [END bigquery_create_table]
Example #5
Source File: test_update_table_require_partition_filter.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_update_table_require_partition_filter(capsys, random_table_id, client): # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] table = bigquery.Table(random_table_id, schema=schema) table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp") table = client.create_table(table) update_table_require_partition_filter.update_table_require_partition_filter( random_table_id ) out, _ = capsys.readouterr() assert ( "Updated table '{}' with require_partition_filter=True".format(random_table_id) in out )
Example #6
Source File: server.py From healthcare-deid with Apache License 2.0 | 6 votes |
def manage_tables(dataset_id, table_id): """Create tables in datasets in BigQuery.""" try: get_bq_dataset(dataset_id) except exceptions.NotFound as e: return flask.jsonify(error=e.code, text=e.message), e.code table_ref = bq_client.dataset(dataset_id).table(table_id) try: jsonschema.validate(flask.request.json, bq_table_schema) except jsonschema.ValidationError: error_msg = 'unable to validate provided payload.' return flask.jsonify(error=400, text=error_msg), 400 schema = [bigquery.SchemaField(field['name'], field['type'], field.get('mode') or 'NULLABLE') for field in flask.request.json] table = bigquery.Table(table_ref, schema=schema) try: table = bq_client.create_table(table) except exceptions.GoogleAPIError as e: return flask.jsonify(error=e.message), 400 return flask.jsonify(result='success'), 200
Example #7
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_list_rows_max_results_w_bqstorage(self): table_ref = DatasetReference("bigquery-public-data", "utility_us").table( "country_code_iso" ) bqstorage_client = bigquery_storage_v1.BigQueryReadClient( credentials=Config.CLIENT._credentials ) row_iterator = Config.CLIENT.list_rows( table_ref, selected_fields=[bigquery.SchemaField("country_name", "STRING")], max_results=100, ) dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) self.assertEqual(len(dataframe.index), 100)
Example #8
Source File: main_test.py From python-docs-samples with Apache License 2.0 | 6 votes |
def test_table_to_dataframe(capsys, clients): from google.cloud import bigquery bqclient, bqstorageclient = clients # [START bigquerystorage_pandas_tutorial_all] # [START bigquerystorage_pandas_tutorial_read_table] # Download a table. table = bigquery.TableReference.from_string( "bigquery-public-data.utility_us.country_code_iso" ) rows = bqclient.list_rows( table, selected_fields=[ bigquery.SchemaField("country_name", "STRING"), bigquery.SchemaField("fips_code", "STRING"), ], ) dataframe = rows.to_dataframe(bqstorage_client=bqstorageclient) print(dataframe.head()) # [END bigquerystorage_pandas_tutorial_read_table] # [END bigquerystorage_pandas_tutorial_all] out, _ = capsys.readouterr() assert "country_name" in out
Example #9
Source File: bigquery_utils.py From ethereum-etl-airflow with MIT License | 6 votes |
def read_bigquery_schema_from_json_recursive(json_schema): """ CAUTION: Recursive function This method can generate BQ schemas for nested records """ result = [] for field in json_schema: if field.get('type').lower() == 'record' and field.get('fields'): schema = bigquery.SchemaField( name=field.get('name'), field_type=field.get('type', 'STRING'), mode=field.get('mode', 'NULLABLE'), description=field.get('description'), fields=read_bigquery_schema_from_json_recursive(field.get('fields')) ) else: schema = bigquery.SchemaField( name=field.get('name'), field_type=field.get('type', 'STRING'), mode=field.get('mode', 'NULLABLE'), description=field.get('description') ) result.append(schema) return result
Example #10
Source File: test_table_util.py From professional-services with Apache License 2.0 | 6 votes |
def test_get_bq_translated_schema(self): """Tests TableUtil.get_bq_translated_schema(). Tests TableUtil's ability to translate a json schema to a BigQuery schema in List[google.cloud.bigquery.schema.SchemaField] format. Returns: True if test passes, else False. """ expected_bq_schema = [ bigquery.SchemaField('string1', 'STRING', 'REQUIRED', 'description1'), bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED', 'description2') ] bq_schema = self.table_util.get_bq_translated_schema() assert expected_bq_schema == bq_schema
Example #11
Source File: helper_function.py From professional-services with Apache License 2.0 | 6 votes |
def csv_in_gcs_to_table(bucket_name: str, object_name: str, dataset_id: str, table_id: str, schema: List[bigquery.SchemaField]) -> None: """Upload CSV to BigQuery table. If the table already exists, it overwrites the table data. Args: bucket_name: Bucket name for holding the object object_name: Name of object to be uploaded dataset_id: Dataset id where the table is located. table_id: String holding id of hte table. schema: Schema of the table_id """ client = bigquery.Client() dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.schema = schema job_config.source_format = bigquery.SourceFormat.CSV job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE uri = "gs://{}/{}".format(bucket_name, object_name) load_job = client.load_table_from_uri(uri, dataset_ref.table(table_id), job_config=job_config) load_job.result()
Example #12
Source File: system.py From python-bigquery with Apache License 2.0 | 6 votes |
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): """Test that a DataFrame containing column with None-type values and int64 datatype can be uploaded without specifying a schema. https://github.com/googleapis/python-bigquery/issues/22 """ dataset_id = _make_dataset_id("bq_load_test") self.temp_dataset(dataset_id) table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( Config.CLIENT.project, dataset_id ) df_data = collections.OrderedDict( [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] ) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) load_job.result() table = Config.CLIENT.get_table(table_id) self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) self.assertEqual(table.num_rows, 4)
Example #13
Source File: import_pipeline.py From professional-services with Apache License 2.0 | 5 votes |
def to_bigquery_schema(self, fields): """Convert list of dicts into `bigquery.SchemaFields`.""" for field in fields: if 'fields' in field: field['fields'] = self.to_bigquery_schema(field['fields']) return [bigquery.SchemaField(**field) for field in fields]
Example #14
Source File: helper_function.py From professional-services with Apache License 2.0 | 5 votes |
def convert_to_schema(schema: List[Dict[str, str]) -> List[bigquery.SchemaField]: """Read the schema as a JSON and reformats as an array. Args: schema: list of dicts to convert to list of SchemaField Returns: List of bigquery.SchemaField objects holding the schema. """
Example #15
Source File: test_parquet_util.py From professional-services with Apache License 2.0 | 5 votes |
def setUp(self): """Sets up resources for tests. """ bq_schema = [ bigquery.SchemaField('string1', 'STRING', 'REQUIRED'), bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED') ] self.parquet_util = parquet_util.ParquetUtil(bq_schema=bq_schema)
Example #16
Source File: executor_test.py From tfx with Apache License 2.0 | 5 votes |
def setUp(self): # Mock BigQuery result schema. self._schema = [ bigquery.SchemaField('i', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('i2', 'INTEGER', mode='REPEATED'), bigquery.SchemaField('b', 'BOOLEAN', mode='REQUIRED'), bigquery.SchemaField('f', 'FLOAT', mode='REQUIRED'), bigquery.SchemaField('f2', 'FLOAT', mode='REPEATED'), bigquery.SchemaField('s', 'STRING', mode='REQUIRED'), bigquery.SchemaField('s2', 'STRING', mode='REPEATED'), ] super(ExecutorTest, self).setUp()
Example #17
Source File: load_table_uri_truncate_csv.py From python-bigquery with Apache License 2.0 | 5 votes |
def load_table_uri_truncate_csv(table_id): # [START bigquery_load_table_gcs_csv_truncate] import six from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name job_config = bigquery.LoadJobConfig( schema=[ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ], ) body = six.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 job_config = bigquery.LoadJobConfig( write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, ) uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" load_job = client.load_table_from_uri( uri, table_id, job_config=job_config ) # Make an API request. load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_csv_truncate]
Example #18
Source File: test_avro_util.py From professional-services with Apache License 2.0 | 5 votes |
def setUp(self): """Sets up resources for tests. """ bq_schema = [ bigquery.SchemaField('string1', 'STRING', 'REQUIRED'), bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED') ] self.schema_name = 'test_schema' self.avro_util = avro_util.AvroUtil(bq_schema=bq_schema, schema_name=self.schema_name)
Example #19
Source File: load_table_uri_truncate_parquet.py From python-bigquery with Apache License 2.0 | 5 votes |
def load_table_uri_truncate_parquet(table_id): # [START bigquery_load_table_gcs_parquet_truncate] import six from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name job_config = bigquery.LoadJobConfig( schema=[ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ], ) body = six.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 job_config = bigquery.LoadJobConfig( write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.PARQUET, ) uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" load_job = client.load_table_from_uri( uri, table_id, job_config=job_config ) # Make an API request. load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_parquet_truncate]
Example #20
Source File: conftest.py From python-bigquery with Apache License 2.0 | 5 votes |
def table_with_schema_id(client, dataset_id): now = datetime.datetime.now() table_id = "python_table_with_schema_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] ) schema = [ bigquery.SchemaField("full_name", "STRING"), bigquery.SchemaField("age", "INTEGER"), ] table = bigquery.Table("{}.{}".format(dataset_id, table_id), schema=schema) table = client.create_table(table) yield "{}.{}.{}".format(table.project, table.dataset_id, table.table_id) client.delete_table(table, not_found_ok=True)
Example #21
Source File: load_table_uri_truncate_orc.py From python-bigquery with Apache License 2.0 | 5 votes |
def load_table_uri_truncate_orc(table_id): # [START bigquery_load_table_gcs_orc_truncate] import six from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name job_config = bigquery.LoadJobConfig( schema=[ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ], ) body = six.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 job_config = bigquery.LoadJobConfig( write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.ORC, ) uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" load_job = client.load_table_from_uri( uri, table_id, job_config=job_config ) # Make an API request. load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_orc_truncate]
Example #22
Source File: load_table_uri_csv.py From python-bigquery with Apache License 2.0 | 5 votes |
def load_table_uri_csv(table_id): # [START bigquery_load_table_gcs_csv] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" job_config = bigquery.LoadJobConfig( schema=[ bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ], skip_leading_rows=1, # The source format defaults to CSV, so the line below is optional. source_format=bigquery.SourceFormat.CSV, ) uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" load_job = client.load_table_from_uri( uri, table_id, job_config=job_config ) # Make an API request. load_job.result() # Waits for the job to complete. destination_table = client.get_table(table_id) # Make an API request. print("Loaded {} rows.".format(destination_table.num_rows)) # [END bigquery_load_table_gcs_csv]
Example #23
Source File: create_table_range_partitioned.py From python-bigquery with Apache License 2.0 | 5 votes |
def create_table_range_partitioned(table_id): # [START bigquery_create_table_range_partitioned] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" schema = [ bigquery.SchemaField("full_name", "STRING"), bigquery.SchemaField("city", "STRING"), bigquery.SchemaField("zipcode", "INTEGER"), ] table = bigquery.Table(table_id, schema=schema) table.range_partitioning = bigquery.RangePartitioning( # To use integer range partitioning, select a top-level REQUIRED / # NULLABLE column with INTEGER / INT64 data type. field="zipcode", range_=bigquery.PartitionRange(start=0, end=100000, interval=10), ) table = client.create_table(table) # Make an API request. print( "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id) ) # [END bigquery_create_table_range_partitioned] return table
Example #24
Source File: add_empty_column.py From python-bigquery with Apache License 2.0 | 5 votes |
def add_empty_column(table_id): # [START bigquery_add_empty_column] from google.cloud import bigquery # Construct a BigQuery client object. client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table # to add an empty column. # table_id = "your-project.your_dataset.your_table_name" table = client.get_table(table_id) # Make an API request. original_schema = table.schema new_schema = original_schema[:] # Creates a copy of the schema. new_schema.append(bigquery.SchemaField("phone", "STRING")) table.schema = new_schema table = client.update_table(table, ["schema"]) # Make an API request. if len(table.schema) == len(original_schema) + 1 == len(new_schema): print("A new column has been added.") else: print("The column has not been added.") # [END bigquery_add_empty_column]
Example #25
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_load_table_from_dataframe_w_required(self): """Test that a DataFrame with required columns can be uploaded if a BigQuery schema is specified. See: https://github.com/googleapis/google-cloud-python/issues/8093 """ table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ) records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] dataframe = pandas.DataFrame(records, columns=["name", "age"]) job_config = bigquery.LoadJobConfig(schema=table_schema) dataset_id = _make_dataset_id("bq_load_test") self.temp_dataset(dataset_id) table_id = "{}.{}.load_table_from_dataframe_w_required".format( Config.CLIENT.project, dataset_id ) # Create the table before loading so that schema mismatch errors are # identified. table = retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) job_config = bigquery.LoadJobConfig(schema=table_schema) load_job = Config.CLIENT.load_table_from_dataframe( dataframe, table_id, job_config=job_config ) load_job.result() table = Config.CLIENT.get_table(table) self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2)
Example #26
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), bigquery.SchemaField("birthday", "DATE", mode="REQUIRED"), bigquery.SchemaField("is_awesome", "BOOLEAN", mode="REQUIRED"), ) json_rows = [ {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, ] dataset_id = _make_dataset_id("bq_system_test") self.temp_dataset(dataset_id) table_id = "{}.{}.load_table_from_json_basic_use".format( Config.CLIENT.project, dataset_id ) # Create the table before loading so that schema mismatch errors are # identified. table = retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) job_config = bigquery.LoadJobConfig(schema=table_schema) load_job = Config.CLIENT.load_table_from_json( json_rows, table_id, job_config=job_config ) load_job.result() table = Config.CLIENT.get_table(table) self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2)
Example #27
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_load_table_from_json_schema_autodetect(self): json_rows = [ {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, ] dataset_id = _make_dataset_id("bq_system_test") self.temp_dataset(dataset_id) table_id = "{}.{}.load_table_from_json_basic_use".format( Config.CLIENT.project, dataset_id ) # Use schema with NULLABLE fields, because schema autodetection # defaults to field mode NULLABLE. table_schema = ( bigquery.SchemaField("name", "STRING", mode="NULLABLE"), bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"), bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), ) # create the table before loading so that the column order is predictable table = retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) # do not pass an explicit job config to trigger automatic schema detection load_job = Config.CLIENT.load_table_from_json(json_rows, table_id) load_job.result() table = Config.CLIENT.get_table(table) self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2)
Example #28
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_list_rows_page_size(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition num_items = 7 page_size = 3 num_pages, num_last_page = divmod(num_items, page_size) SF = bigquery.SchemaField schema = [SF("string_col", "STRING", mode="NULLABLE")] to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] rows = [json.dumps(row) for row in to_insert] body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("nested_df")) table = dataset.table(table_id) self.to_delete.insert(0, table) job_config = bigquery.LoadJobConfig() job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON job_config.schema = schema # Load a table using a local JSON file from memory. Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) pages = df.pages for i in range(num_pages): page = next(pages) self.assertEqual(page.num_items, page_size) page = next(pages) self.assertEqual(page.num_items, num_last_page)
Example #29
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def test_insert_rows_nested_nested_dictionary(self): # See #2951 SF = bigquery.SchemaField schema = [ SF("string_col", "STRING", mode="NULLABLE"), SF( "record_col", "RECORD", mode="NULLABLE", fields=[ SF("nested_string", "STRING", mode="NULLABLE"), SF("nested_repeated", "INTEGER", mode="REPEATED"), SF( "nested_record", "RECORD", mode="NULLABLE", fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], ), ], ), ] record = { "nested_string": "another string value", "nested_repeated": [0, 1, 2], "nested_record": {"nested_nested_string": "some deep insight"}, } to_insert = [{"string_col": "Some value", "record_col": record}] table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) Config.CLIENT.insert_rows(table, to_insert) retry = RetryResult(_has_rows, max_tries=8) rows = retry(self._fetch_single_page)(table) row_tuples = [r.values() for r in rows] expected_rows = [("Some value", record)] self.assertEqual(row_tuples, expected_rows)
Example #30
Source File: system.py From python-bigquery with Apache License 2.0 | 5 votes |
def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition dataset = self.temp_dataset(dataset_id) greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE") table_ref = dataset.table(table_id) table_arg = Table(table_ref, schema=[greeting]) table = retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: with open(temp.name, "w") as csv_write: writer = csv.writer(csv_write) writer.writerow(("Greeting",)) writer.writerows(rows) with open(temp.name, "rb") as csv_read: config = bigquery.LoadJobConfig() config.source_format = SourceFormat.CSV config.skip_leading_rows = 1 config.create_disposition = CreateDisposition.CREATE_NEVER config.write_disposition = WriteDisposition.WRITE_EMPTY job = Config.CLIENT.load_table_from_file( csv_read, table_ref, job_config=config ) # Retry until done. job.result(timeout=JOB_TIMEOUT) self._fetch_single_page(table)