Python Examples of google.cloud.bigquery.SchemaField

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_update_table_schema(self):
        dataset = self.temp_dataset(_make_dataset_id("update_table"))

        TABLE_NAME = "test_table"
        table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
        self.assertFalse(_table_exists(table_arg))
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)
        self.assertTrue(_table_exists(table))
        voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE")
        schema = table.schema
        schema.append(voter)
        table.schema = schema

        updated_table = Config.CLIENT.update_table(table, ["schema"])

        self.assertEqual(len(updated_table.schema), len(schema))
        for found, expected in zip(updated_table.schema, schema):
            self.assertEqual(found.name, expected.name)
            self.assertEqual(found.field_type, expected.field_type)
            self.assertEqual(found.mode, expected.mode)

Source File: create_table.py From python-bigquery with Apache License 2.0

6 votes

def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table]

Source File: test_update_table_require_partition_filter.py From python-bigquery with Apache License 2.0

6 votes

def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    )

Source File: create_table.py From python-bigquery with Apache License 2.0

6 votes

def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table]

Source File: test_update_table_require_partition_filter.py From python-bigquery with Apache License 2.0

6 votes

def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    )

Source File: server.py From healthcare-deid with Apache License 2.0

6 votes

def manage_tables(dataset_id, table_id):
  """Create tables in datasets in BigQuery."""
  try:
    get_bq_dataset(dataset_id)
  except exceptions.NotFound as e:
    return flask.jsonify(error=e.code, text=e.message), e.code

  table_ref = bq_client.dataset(dataset_id).table(table_id)
  try:
    jsonschema.validate(flask.request.json, bq_table_schema)
  except jsonschema.ValidationError:
    error_msg = 'unable to validate provided payload.'
    return flask.jsonify(error=400, text=error_msg), 400

  schema = [bigquery.SchemaField(field['name'], field['type'],
                                 field.get('mode') or 'NULLABLE')
            for field in flask.request.json]
  table = bigquery.Table(table_ref, schema=schema)
  try:
    table = bq_client.create_table(table)
  except exceptions.GoogleAPIError as e:
    return flask.jsonify(error=e.message), 400

  return flask.jsonify(result='success'), 200

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_list_rows_max_results_w_bqstorage(self):
        table_ref = DatasetReference("bigquery-public-data", "utility_us").table(
            "country_code_iso"
        )
        bqstorage_client = bigquery_storage_v1.BigQueryReadClient(
            credentials=Config.CLIENT._credentials
        )

        row_iterator = Config.CLIENT.list_rows(
            table_ref,
            selected_fields=[bigquery.SchemaField("country_name", "STRING")],
            max_results=100,
        )
        dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client)

        self.assertEqual(len(dataframe.index), 100)

Source File: main_test.py From python-docs-samples with Apache License 2.0

6 votes

def test_table_to_dataframe(capsys, clients):
    from google.cloud import bigquery

    bqclient, bqstorageclient = clients

    # [START bigquerystorage_pandas_tutorial_all]
    # [START bigquerystorage_pandas_tutorial_read_table]
    # Download a table.
    table = bigquery.TableReference.from_string(
        "bigquery-public-data.utility_us.country_code_iso"
    )
    rows = bqclient.list_rows(
        table,
        selected_fields=[
            bigquery.SchemaField("country_name", "STRING"),
            bigquery.SchemaField("fips_code", "STRING"),
        ],
    )
    dataframe = rows.to_dataframe(bqstorage_client=bqstorageclient)
    print(dataframe.head())
    # [END bigquerystorage_pandas_tutorial_read_table]
    # [END bigquerystorage_pandas_tutorial_all]

    out, _ = capsys.readouterr()
    assert "country_name" in out

Source File: bigquery_utils.py From ethereum-etl-airflow with MIT License

6 votes

def read_bigquery_schema_from_json_recursive(json_schema):
    """
    CAUTION: Recursive function
    This method can generate BQ schemas for nested records
    """
    result = []
    for field in json_schema:
        if field.get('type').lower() == 'record' and field.get('fields'):
            schema = bigquery.SchemaField(
                name=field.get('name'),
                field_type=field.get('type', 'STRING'),
                mode=field.get('mode', 'NULLABLE'),
                description=field.get('description'),
                fields=read_bigquery_schema_from_json_recursive(field.get('fields'))
            )
        else:
            schema = bigquery.SchemaField(
                name=field.get('name'),
                field_type=field.get('type', 'STRING'),
                mode=field.get('mode', 'NULLABLE'),
                description=field.get('description')
            )
        result.append(schema)
    return result

Source File: test_table_util.py From professional-services with Apache License 2.0

6 votes

def test_get_bq_translated_schema(self):
        """Tests TableUtil.get_bq_translated_schema().

        Tests TableUtil's ability to translate a json schema to a BigQuery
        schema in List[google.cloud.bigquery.schema.SchemaField] format.

        Returns:
            True if test passes, else False.
        """
        expected_bq_schema = [
            bigquery.SchemaField('string1', 'STRING', 'REQUIRED',
                                 'description1'),
            bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED',
                                 'description2')
        ]
        bq_schema = self.table_util.get_bq_translated_schema()

        assert expected_bq_schema == bq_schema

Source File: helper_function.py From professional-services with Apache License 2.0

6 votes

def csv_in_gcs_to_table(bucket_name: str, object_name: str, dataset_id: str,
                        table_id: str,
                        schema: List[bigquery.SchemaField]) -> None:
    """Upload CSV to BigQuery table.
        If the table already exists, it overwrites the table data.

    Args:
        bucket_name: Bucket name for holding the object
        object_name: Name of object to be uploaded
        dataset_id: Dataset id where the table is located.
        table_id: String holding id of hte table.
        schema: Schema of the table_id
    """
    client = bigquery.Client()
    dataset_ref = client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = schema
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    uri = "gs://{}/{}".format(bucket_name, object_name)
    load_job = client.load_table_from_uri(uri,
                                          dataset_ref.table(table_id),
                                          job_config=job_config)
    load_job.result()

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
        """Test that a DataFrame containing column with None-type values and int64 datatype
        can be uploaded without specifying a schema.

        https://github.com/googleapis/python-bigquery/issues/22
        """

        dataset_id = _make_dataset_id("bq_load_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
            Config.CLIENT.project, dataset_id
        )
        df_data = collections.OrderedDict(
            [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
        )
        dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
        load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
        load_job.result()
        table = Config.CLIENT.get_table(table_id)
        self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
        self.assertEqual(table.num_rows, 4)

Source File: import_pipeline.py From professional-services with Apache License 2.0

5 votes

def to_bigquery_schema(self, fields):
        """Convert list of dicts into `bigquery.SchemaFields`."""
        for field in fields:
            if 'fields' in field:
                field['fields'] = self.to_bigquery_schema(field['fields'])
        return [bigquery.SchemaField(**field) for field in fields]

Source File: helper_function.py From professional-services with Apache License 2.0

5 votes

def convert_to_schema(schema: List[Dict[str, str]) -> List[bigquery.SchemaField]:
    """Read the schema as a JSON and reformats as an array.

    Args:
        schema: list of dicts to convert to list of SchemaField

    Returns:
        List of bigquery.SchemaField objects holding the schema.
    """

Source File: test_parquet_util.py From professional-services with Apache License 2.0

5 votes

def setUp(self):
        """Sets up resources for tests.
        """
        bq_schema = [
            bigquery.SchemaField('string1', 'STRING', 'REQUIRED'),
            bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED')
        ]
        self.parquet_util = parquet_util.ParquetUtil(bq_schema=bq_schema)

Source File: executor_test.py From tfx with Apache License 2.0

5 votes

def setUp(self):
    # Mock BigQuery result schema.
    self._schema = [
        bigquery.SchemaField('i', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('i2', 'INTEGER', mode='REPEATED'),
        bigquery.SchemaField('b', 'BOOLEAN', mode='REQUIRED'),
        bigquery.SchemaField('f', 'FLOAT', mode='REQUIRED'),
        bigquery.SchemaField('f2', 'FLOAT', mode='REPEATED'),
        bigquery.SchemaField('s', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('s2', 'STRING', mode='REPEATED'),
    ]
    super(ExecutorTest, self).setUp()

Source File: load_table_uri_truncate_csv.py From python-bigquery with Apache License 2.0

5 votes

def load_table_uri_truncate_csv(table_id):

    # [START bigquery_load_table_gcs_csv_truncate]
    import six

    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
    )

    body = six.BytesIO(b"Washington,WA")
    client.load_table_from_file(body, table_id, job_config=job_config).result()
    previous_rows = client.get_table(table_id).num_rows
    assert previous_rows > 0

    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
        source_format=bigquery.SourceFormat.CSV,
        skip_leading_rows=1,
    )

    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_csv_truncate]

Source File: test_avro_util.py From professional-services with Apache License 2.0

5 votes

def setUp(self):
        """Sets up resources for tests.
        """
        bq_schema = [
            bigquery.SchemaField('string1', 'STRING', 'REQUIRED'),
            bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED')
        ]
        self.schema_name = 'test_schema'
        self.avro_util = avro_util.AvroUtil(bq_schema=bq_schema,
                                            schema_name=self.schema_name)

Source File: load_table_uri_truncate_parquet.py From python-bigquery with Apache License 2.0

5 votes

def load_table_uri_truncate_parquet(table_id):

    # [START bigquery_load_table_gcs_parquet_truncate]
    import six

    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
    )

    body = six.BytesIO(b"Washington,WA")
    client.load_table_from_file(body, table_id, job_config=job_config).result()
    previous_rows = client.get_table(table_id).num_rows
    assert previous_rows > 0

    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
        source_format=bigquery.SourceFormat.PARQUET,
    )

    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"
    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_parquet_truncate]

Source File: conftest.py From python-bigquery with Apache License 2.0

5 votes

def table_with_schema_id(client, dataset_id):
    now = datetime.datetime.now()
    table_id = "python_table_with_schema_{}_{}".format(
        now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]
    )
    schema = [
        bigquery.SchemaField("full_name", "STRING"),
        bigquery.SchemaField("age", "INTEGER"),
    ]
    table = bigquery.Table("{}.{}".format(dataset_id, table_id), schema=schema)
    table = client.create_table(table)
    yield "{}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    client.delete_table(table, not_found_ok=True)

Source File: load_table_uri_truncate_orc.py From python-bigquery with Apache License 2.0

5 votes

def load_table_uri_truncate_orc(table_id):

    # [START bigquery_load_table_gcs_orc_truncate]
    import six

    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
    )

    body = six.BytesIO(b"Washington,WA")
    client.load_table_from_file(body, table_id, job_config=job_config).result()
    previous_rows = client.get_table(table_id).num_rows
    assert previous_rows > 0

    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
        source_format=bigquery.SourceFormat.ORC,
    )

    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc"
    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_orc_truncate]

Source File: load_table_uri_csv.py From python-bigquery with Apache License 2.0

5 votes

def load_table_uri_csv(table_id):

    # [START bigquery_load_table_gcs_csv]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
        skip_leading_rows=1,
        # The source format defaults to CSV, so the line below is optional.
        source_format=bigquery.SourceFormat.CSV,
    )
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)  # Make an API request.
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_csv]

Source File: create_table_range_partitioned.py From python-bigquery with Apache License 2.0

5 votes

def create_table_range_partitioned(table_id):

    # [START bigquery_create_table_range_partitioned]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING"),
        bigquery.SchemaField("city", "STRING"),
        bigquery.SchemaField("zipcode", "INTEGER"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table.range_partitioning = bigquery.RangePartitioning(
        # To use integer range partitioning, select a top-level REQUIRED /
        # NULLABLE column with INTEGER / INT64 data type.
        field="zipcode",
        range_=bigquery.PartitionRange(start=0, end=100000, interval=10),
    )
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table_range_partitioned]
    return table

Source File: add_empty_column.py From python-bigquery with Apache License 2.0

5 votes

def add_empty_column(table_id):

    # [START bigquery_add_empty_column]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table
    #                  to add an empty column.
    # table_id = "your-project.your_dataset.your_table_name"

    table = client.get_table(table_id)  # Make an API request.

    original_schema = table.schema
    new_schema = original_schema[:]  # Creates a copy of the schema.
    new_schema.append(bigquery.SchemaField("phone", "STRING"))

    table.schema = new_schema
    table = client.update_table(table, ["schema"])  # Make an API request.

    if len(table.schema) == len(original_schema) + 1 == len(new_schema):
        print("A new column has been added.")
    else:
        print("The column has not been added.")
    # [END bigquery_add_empty_column]

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_load_table_from_dataframe_w_required(self):
        """Test that a DataFrame with required columns can be uploaded if a
        BigQuery schema is specified.

        See: https://github.com/googleapis/google-cloud-python/issues/8093
        """
        table_schema = (
            bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
        )

        records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}]
        dataframe = pandas.DataFrame(records, columns=["name", "age"])
        job_config = bigquery.LoadJobConfig(schema=table_schema)
        dataset_id = _make_dataset_id("bq_load_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_dataframe_w_required".format(
            Config.CLIENT.project, dataset_id
        )

        # Create the table before loading so that schema mismatch errors are
        # identified.
        table = retry_403(Config.CLIENT.create_table)(
            Table(table_id, schema=table_schema)
        )
        self.to_delete.insert(0, table)

        job_config = bigquery.LoadJobConfig(schema=table_schema)
        load_job = Config.CLIENT.load_table_from_dataframe(
            dataframe, table_id, job_config=job_config
        )
        load_job.result()

        table = Config.CLIENT.get_table(table)
        self.assertEqual(tuple(table.schema), table_schema)
        self.assertEqual(table.num_rows, 2)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_load_table_from_json_basic_use(self):
        table_schema = (
            bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
            bigquery.SchemaField("birthday", "DATE", mode="REQUIRED"),
            bigquery.SchemaField("is_awesome", "BOOLEAN", mode="REQUIRED"),
        )

        json_rows = [
            {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
            {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
        ]

        dataset_id = _make_dataset_id("bq_system_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_json_basic_use".format(
            Config.CLIENT.project, dataset_id
        )

        # Create the table before loading so that schema mismatch errors are
        # identified.
        table = retry_403(Config.CLIENT.create_table)(
            Table(table_id, schema=table_schema)
        )
        self.to_delete.insert(0, table)

        job_config = bigquery.LoadJobConfig(schema=table_schema)
        load_job = Config.CLIENT.load_table_from_json(
            json_rows, table_id, job_config=job_config
        )
        load_job.result()

        table = Config.CLIENT.get_table(table)
        self.assertEqual(tuple(table.schema), table_schema)
        self.assertEqual(table.num_rows, 2)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_load_table_from_json_schema_autodetect(self):
        json_rows = [
            {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
            {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
        ]

        dataset_id = _make_dataset_id("bq_system_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_json_basic_use".format(
            Config.CLIENT.project, dataset_id
        )

        # Use schema with NULLABLE fields, because schema autodetection
        # defaults to field mode NULLABLE.
        table_schema = (
            bigquery.SchemaField("name", "STRING", mode="NULLABLE"),
            bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
            bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"),
            bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"),
        )
        # create the table before loading so that the column order is predictable
        table = retry_403(Config.CLIENT.create_table)(
            Table(table_id, schema=table_schema)
        )
        self.to_delete.insert(0, table)

        # do not pass an explicit job config to trigger automatic schema detection
        load_job = Config.CLIENT.load_table_from_json(json_rows, table_id)
        load_job.result()

        table = Config.CLIENT.get_table(table)
        self.assertEqual(tuple(table.schema), table_schema)
        self.assertEqual(table.num_rows, 2)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_list_rows_page_size(self):
        from google.cloud.bigquery.job import SourceFormat
        from google.cloud.bigquery.job import WriteDisposition

        num_items = 7
        page_size = 3
        num_pages, num_last_page = divmod(num_items, page_size)

        SF = bigquery.SchemaField
        schema = [SF("string_col", "STRING", mode="NULLABLE")]
        to_insert = [{"string_col": "item%d" % i} for i in range(num_items)]
        rows = [json.dumps(row) for row in to_insert]
        body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))

        table_id = "test_table"
        dataset = self.temp_dataset(_make_dataset_id("nested_df"))
        table = dataset.table(table_id)
        self.to_delete.insert(0, table)
        job_config = bigquery.LoadJobConfig()
        job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
        job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
        job_config.schema = schema
        # Load a table using a local JSON file from memory.
        Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result()

        df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size)
        pages = df.pages

        for i in range(num_pages):
            page = next(pages)
            self.assertEqual(page.num_items, page_size)
        page = next(pages)
        self.assertEqual(page.num_items, num_last_page)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_insert_rows_nested_nested_dictionary(self):
        # See #2951
        SF = bigquery.SchemaField
        schema = [
            SF("string_col", "STRING", mode="NULLABLE"),
            SF(
                "record_col",
                "RECORD",
                mode="NULLABLE",
                fields=[
                    SF("nested_string", "STRING", mode="NULLABLE"),
                    SF("nested_repeated", "INTEGER", mode="REPEATED"),
                    SF(
                        "nested_record",
                        "RECORD",
                        mode="NULLABLE",
                        fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")],
                    ),
                ],
            ),
        ]
        record = {
            "nested_string": "another string value",
            "nested_repeated": [0, 1, 2],
            "nested_record": {"nested_nested_string": "some deep insight"},
        }
        to_insert = [{"string_col": "Some value", "record_col": record}]
        table_id = "test_table"
        dataset = self.temp_dataset(_make_dataset_id("issue_2951"))
        table_arg = Table(dataset.table(table_id), schema=schema)
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)

        Config.CLIENT.insert_rows(table, to_insert)

        retry = RetryResult(_has_rows, max_tries=8)
        rows = retry(self._fetch_single_page)(table)
        row_tuples = [r.values() for r in rows]
        expected_rows = [("Some value", record)]
        self.assertEqual(row_tuples, expected_rows)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def _load_table_for_dml(self, rows, dataset_id, table_id):
        from google.cloud._testing import _NamedTemporaryFile
        from google.cloud.bigquery.job import CreateDisposition
        from google.cloud.bigquery.job import SourceFormat
        from google.cloud.bigquery.job import WriteDisposition

        dataset = self.temp_dataset(dataset_id)
        greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE")
        table_ref = dataset.table(table_id)
        table_arg = Table(table_ref, schema=[greeting])
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, "w") as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(("Greeting",))
                writer.writerows(rows)

            with open(temp.name, "rb") as csv_read:
                config = bigquery.LoadJobConfig()
                config.source_format = SourceFormat.CSV
                config.skip_leading_rows = 1
                config.create_disposition = CreateDisposition.CREATE_NEVER
                config.write_disposition = WriteDisposition.WRITE_EMPTY
                job = Config.CLIENT.load_table_from_file(
                    csv_read, table_ref, job_config=config
                )

        # Retry until done.
        job.result(timeout=JOB_TIMEOUT)
        self._fetch_single_page(table)

Python google.cloud.bigquery.SchemaField() Examples