Python Examples of google.cloud.bigquery.Table

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_extract_table(self):
        local_id = unique_resource_id()
        bucket_name = "bq_extract_test" + local_id
        source_blob_name = "person_ages.csv"
        dataset_id = _make_dataset_id("load_gcs_then_extract")
        table_id = "test_table"
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        table_ref = dataset_ref.table(table_id)
        table = Table(table_ref)
        self.to_delete.insert(0, table)
        bucket = self._create_bucket(bucket_name)
        self._load_table_for_extract_table(bucket, source_blob_name, table_ref, ROWS)
        destination_blob_name = "person_ages_out.csv"
        destination = bucket.blob(destination_blob_name)
        destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name)

        job = Config.CLIENT.extract_table(table_ref, destination_uri)
        job.result(timeout=100)

        self.to_delete.insert(0, destination)
        got_bytes = retry_storage_errors(destination.download_as_string)()
        got = got_bytes.decode("utf-8")
        self.assertIn("Bharney Rhubble", got)

Source File: server.py From healthcare-deid with Apache License 2.0

6 votes

def manage_tables(dataset_id, table_id):
  """Create tables in datasets in BigQuery."""
  try:
    get_bq_dataset(dataset_id)
  except exceptions.NotFound as e:
    return flask.jsonify(error=e.code, text=e.message), e.code

  table_ref = bq_client.dataset(dataset_id).table(table_id)
  try:
    jsonschema.validate(flask.request.json, bq_table_schema)
  except jsonschema.ValidationError:
    error_msg = 'unable to validate provided payload.'
    return flask.jsonify(error=400, text=error_msg), 400

  schema = [bigquery.SchemaField(field['name'], field['type'],
                                 field.get('mode') or 'NULLABLE')
            for field in flask.request.json]
  table = bigquery.Table(table_ref, schema=schema)
  try:
    table = bq_client.create_table(table)
  except exceptions.GoogleAPIError as e:
    return flask.jsonify(error=e.message), 400

  return flask.jsonify(result='success'), 200

Source File: bigquery.py From professional-services with Apache License 2.0

6 votes

def create_table(self, project_id, dataset_id, table_id, schema=None):
        """Creates a BigQuery table from a schema.

        Args:
            project_id (str): Project id.
            dataset_id (str): Dataset id.
            table_id (str): Table id to create.
            schema (dict): BigQuery table schema in JSON format.
        """
        pyschema = []
        if schema is not None:
            schema = TABLE_SCHEMA
        for row in schema:
            field = bigquery.SchemaField(row['name'],
                                         row['type'],
                                         mode=row['mode'])
            pyschema.append(field)
        table_name = f"{project_id}.{dataset_id}.{table_id}"
        LOGGER.info(f"Creating table {table_name}", table_name)
        table = bigquery.Table(table_name, schema=pyschema)
        return self.client.create_table(table)

Source File: analyzer.py From bigquery-view-analyzer with MIT License

6 votes

def format_tree(self, show_key=False, show_status=False):
        log.info(f"Formatting tree...")
        tree_string = ""
        key = {
            "project": (Fore.CYAN + "◉" + Fore.RESET + " = Project".ljust(12)),
            "dataset": (Fore.YELLOW + "◉" + Fore.RESET + " = Dataset".ljust(12)),
            "table": (Fore.RED + "◉" + Fore.RESET + " = Table".ljust(12)),
            "view": (Fore.GREEN + "◉" + Fore.RESET + " = View".ljust(12)),
        }
        if show_key:
            tree_string += "Key:\n{}{}\n{}{}\n\n".format(
                key["project"], key["table"], key["dataset"], key["view"]
            )
        for pre, _, node in RenderTree(self.tree):
            tree_string += "%s%s\n" % (
                pre,
                node.pretty_name(show_authorization_status=show_status),
            )
        return tree_string

Source File: gbq.py From pandas-gbq with BSD 3-Clause "New" or "Revised" License

6 votes

def delete(self, table_id):
        """ Delete a table in Google BigQuery

        Parameters
        ----------
        table : str
            Name of table to be deleted
        """
        from google.api_core.exceptions import NotFound

        if not self.exists(table_id):
            raise NotFoundException("Table does not exist")

        table_ref = self.client.dataset(self.dataset_id).table(table_id)
        try:
            self.client.delete_table(table_ref)
        except NotFound:
            # Ignore 404 error which may occur if table already deleted
            pass
        except self.http_error as ex:
            self.process_http_error(ex)

Source File: datamgr.py From ibis with Apache License 2.0

6 votes

def parquet(tables, data_directory, ignore_missing_dependency, **params):
    try:
        import pyarrow as pa  # noqa: F401
        import pyarrow.parquet as pq  # noqa: F401
    except ImportError:
        msg = 'PyArrow dependency is missing'
        if ignore_missing_dependency:
            logger.warning('Ignored: %s', msg)
            return 0
        else:
            raise click.ClickException(msg)

    data_directory = Path(data_directory)
    for table, df in read_tables(tables, data_directory):
        arrow_table = pa.Table.from_pandas(df)
        target_path = data_directory / '{}.parquet'.format(table)
        pq.write_table(arrow_table, str(target_path))

Source File: bigquery_component.py From professional-services with Apache License 2.0

6 votes

def delete_table(self, dataset_id, table_name):
        """Deletes BigQuery table.

        Args:
            dataset_id (str): BigQuery dataset id.
            table_name (str): BigQuery table name.
        """

        table_ref = self.client.dataset(dataset_id).table(table_name)
        try:
            self.client.delete_table(table_ref)
            logger.debug("Deleted table %s from %s dataset", table_name,
                         dataset_id)
        except exceptions.NotFound as error:
            logger.debug(error)
            logger.debug("Table %s not found in %s dataset. No need to delete",
                         table_name, dataset_id)

Source File: test_user_info_updater.py From professional-services with Apache License 2.0

6 votes

def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table)

Source File: test_nested_user_info_updater.py From professional-services with Apache License 2.0

6 votes

def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table)

Source File: test_user_info_updater.py From professional-services with Apache License 2.0

6 votes

def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table)

Source File: test_nested_user_info_updater.py From professional-services with Apache License 2.0

6 votes

def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table)

Source File: test_update_table_require_partition_filter.py From python-bigquery with Apache License 2.0

6 votes

def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    )

Source File: create_table.py From python-bigquery with Apache License 2.0

6 votes

def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table]

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_job_cancel(self):
        DATASET_ID = _make_dataset_id("job_cancel")
        JOB_ID_PREFIX = "fetch_" + DATASET_ID
        TABLE_NAME = "test_table"
        QUERY = "SELECT * FROM %s.%s" % (DATASET_ID, TABLE_NAME)

        dataset = self.temp_dataset(DATASET_ID)

        table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)

        job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX)
        job.cancel()

        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        # The `cancel` API doesn't leave any reliable traces on
        # the status of the job resource, so we can't really assert for
        # them here.  The best we can do is not that the API call didn't
        # raise an error, and that the job completed (in the `retry()`
        # above).

Source File: bigquery.py From loaner with Apache License 2.0

6 votes

def _create_table(self, table_name, entity_instance):
    """Creates a BigQuery Table or attempts to update an existing schema.

    Args:
      table_name: str, name of the table to be created or updated.
      entity_instance: an ndb.Model entity instance to base the schema on.
    """
    table_ref = bigquery.TableReference(self._dataset_ref, table_name)
    entity_schema = _generate_entity_schema(entity_instance)
    table_schema = _generate_schema(entity_schema)
    table = bigquery.Table(table_ref, schema=table_schema)
    try:
      table = self._client.create_table(table)
    except cloud.exceptions.Conflict:
      logging.info('Table %s already exists, attempting to update it.',
                   table_name)
      merged_schema = _merge_schemas(table.schema, table_schema)
      table.schema = merged_schema
      table = self._client.update_table(table, ['schema'])
      logging.info('Table %s updated.', table_name)
    else:
      logging.info('Table %s created.', table_name)

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def test_update_table_schema(self):
        dataset = self.temp_dataset(_make_dataset_id("update_table"))

        TABLE_NAME = "test_table"
        table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
        self.assertFalse(_table_exists(table_arg))
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)
        self.assertTrue(_table_exists(table))
        voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE")
        schema = table.schema
        schema.append(voter)
        table.schema = schema

        updated_table = Config.CLIENT.update_table(table, ["schema"])

        self.assertEqual(len(updated_table.schema), len(schema))
        for found, expected in zip(updated_table.schema, schema):
            self.assertEqual(found.name, expected.name)
            self.assertEqual(found.field_type, expected.field_type)
            self.assertEqual(found.mode, expected.mode)

Source File: create_table.py From python-bigquery with Apache License 2.0

6 votes

def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table]

Source File: system.py From python-bigquery with Apache License 2.0

6 votes

def tearDown(self):
        def _still_in_use(bad_request):
            return any(
                error["reason"] == "resourceInUse" for error in bad_request._errors
            )

        retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
        retry_storage_errors_conflict = RetryErrors(
            (Conflict, TooManyRequests, InternalServerError, ServiceUnavailable)
        )
        for doomed in self.to_delete:
            if isinstance(doomed, storage.Bucket):
                retry_storage_errors_conflict(doomed.delete)(force=True)
            elif isinstance(doomed, (Dataset, bigquery.DatasetReference)):
                retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
            elif isinstance(doomed, (Table, bigquery.TableReference)):
                retry_in_use(Config.CLIENT.delete_table)(doomed)
            else:
                doomed.delete()

Source File: test_update_table_require_partition_filter.py From python-bigquery with Apache License 2.0

6 votes

def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    )

Source File: test_get_table.py From python-bigquery with Apache License 2.0

6 votes

def test_get_table(capsys, random_table_id, client):

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(random_table_id, schema)
    table.description = "Sample Table"
    table = client.create_table(table)

    get_table.get_table(random_table_id)
    out, err = capsys.readouterr()
    assert "Got table '{}'.".format(random_table_id) in out
    assert "full_name" in out
    assert "Table description: Sample Table" in out
    assert "Table has 0 rows" in out
    client.delete_table(table, not_found_ok=True)

Source File: bigquery_resource_util.py From professional-services with Apache License 2.0

6 votes

def create_table(self, table_id, schema):
        """Creates a table in BigQuery.

        Args:
            table_id(str): Id of the table to be created.
            schema(List[google.cloud.bigquery.schema.SchemaField]): The
                schema of the table to be created in BigQuery format.

        Returns: The created table (google.cloud.bigquery.table.Table).
        """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=schema)
        created_table = self.bq_client.create_table(table)
        logging.info('{0:s} Created Table {1:s}'.format(
            str(datetime.datetime.now()), table_id))
        return created_table

Source File: bigQueryUtil.py From CDSS with GNU General Public License v3.0

5 votes

def create_new_table_from_schema(self, dataset_id, table_id,
                                     schema):
        '''
        https://cloud.google.com/bigquery/docs/tables#create-table

        :param dataset_id: dataset name
        :param table_id: table name
        :param schema:
            schema = [
                bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED', description='blah'),
                bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
            ]
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            #print(f'Table {table_id} in dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Table object to send to the API.
            table = bigquery.Table(table_ref, schema=schema)
            table = self.client.create_table(table)  # API request
            log.info('''
                    Table {} in dataset {}
                    created successfully project: {}.
                    '''.format(table.table_id, dataset_id, self.client.project))
            '''
            print(
                f'Table {table.table_id} in dataset {dataset_id}'
                f'created successfully project: {self.client.project}.'
            )
            '''

Source File: conftest.py From python-bigquery with Apache License 2.0

5 votes

def table_id(client, dataset_id):
    now = datetime.datetime.now()
    table_id = "python_table_sample_{}_{}".format(
        now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]
    )

    table = bigquery.Table("{}.{}".format(dataset_id, table_id))
    table = client.create_table(table)
    yield "{}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    client.delete_table(table, not_found_ok=True)

Source File: test_table_insert_rows_explicit_none_insert_ids.py From python-bigquery with Apache License 2.0

5 votes

def test_table_insert_rows_explicit_none_insert_ids(capsys, random_table_id, client):

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(random_table_id, schema=schema)
    table = client.create_table(table)

    mut.table_insert_rows_explicit_none_insert_ids(random_table_id)
    out, err = capsys.readouterr()
    assert "New rows have been added." in out

Source File: bigquery_component.py From professional-services with Apache License 2.0

5 votes

def create_table(self, dataset_id, table_name, schema):
        """Creates BigQuery table.

        Args:
            dataset_id (str): BigQuery dataset id.
            table_name (str): BigQuery table name.
            schema (List[google.cloud.bigquery.schema.SchemaField]): Schema
                of the table to be created.
        """

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_name)
        table = bigquery.Table(table_ref, schema)
        self.client.create_table(table)

Source File: system.py From python-bigquery with Apache License 2.0

5 votes

def test_list_rows_empty_table(self):
        from google.cloud.bigquery.table import RowIterator

        dataset_id = _make_dataset_id("empty_table")
        dataset = self.temp_dataset(dataset_id)
        table_ref = dataset.table("empty_table")
        table = Config.CLIENT.create_table(bigquery.Table(table_ref))

        # It's a bit silly to list rows for an empty table, but this does
        # happen as the result of a DDL query from an IPython magic command.
        rows = Config.CLIENT.list_rows(table)
        self.assertIsInstance(rows, RowIterator)
        self.assertEqual(tuple(rows), ())

Source File: BigQueryConnect_py2.py From CDSS with GNU General Public License v3.0

5 votes

def create_new_table_from_schema(self, dataset_id, table_id,
                                     schema):
        '''
        https://cloud.google.com/bigquery/docs/tables#create-table

        :param dataset_id: dataset name
        :param table_id: table name
        :param schema:
            schema = [
                bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED', description='blah'),
                bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
            ]
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            #print(f'Table {table_id} in dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Table object to send to the API.
            table = bigquery.Table(table_ref, schema=schema)
            table = self.client.create_table(table)  # API request
            log.info('''
                    Table {} in dataset {}
                    created successfully project: {}.
                    '''.format(table.table_id, dataset_id, self.client.project))
            '''
            print(
                f'Table {table.table_id} in dataset {dataset_id}'
                f'created successfully project: {self.client.project}.'
            )
            '''

Source File: bigquery.py From openprescribing with MIT License

5 votes

def create_table(self, table_id, schema):
        table_ref = self.dataset.table(table_id)
        table = gcbq.Table(table_ref, schema=schema)

        try:
            self.gcbq_client.create_table(table)
        except NotFound as e:
            if not dataset_is_missing(e):
                raise
            self.create_dataset()
            self.gcbq_client.create_table(table)

        return Table(table_ref, self)

Source File: BigQueryConnect.py From CDSS with GNU General Public License v3.0

5 votes

def create_new_table_from_schema(self, dataset_id: str, table_id: str,
                                     schema: List[bigquery.SchemaField]) -> None:
        '''
        https://cloud.google.com/bigquery/docs/tables#create-table

        :param dataset_id: dataset name
        :param table_id: table name
        :param schema:
            schema = [
                bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED', description='blah'),
                bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
            ]
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            print(f'Table {table_id} in dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Table object to send to the API.
            table = bigquery.Table(table_ref, schema=schema)
            table = self.client.create_table(table)  # API request
            print(
                f'Table {table.table_id} in dataset {dataset_id}'
                f'created successfully project: {self.client.project}.'
            )

Source File: test_table_exists.py From python-bigquery with Apache License 2.0

5 votes

def test_table_exists(capsys, random_table_id, client):

    table_exists.table_exists(random_table_id)
    out, err = capsys.readouterr()
    assert "Table {} is not found.".format(random_table_id) in out
    table = bigquery.Table(random_table_id)
    table = client.create_table(table)
    table_exists.table_exists(random_table_id)
    out, err = capsys.readouterr()
    assert "Table {} already exists.".format(random_table_id) in out

Python google.cloud.bigquery.Table() Examples