Python google.cloud.bigquery.Table() Examples

The following are 30 code examples of google.cloud.bigquery.Table(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.bigquery , or try the search function .
Example #1
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_extract_table(self):
        local_id = unique_resource_id()
        bucket_name = "bq_extract_test" + local_id
        source_blob_name = "person_ages.csv"
        dataset_id = _make_dataset_id("load_gcs_then_extract")
        table_id = "test_table"
        project = Config.CLIENT.project
        dataset_ref = bigquery.DatasetReference(project, dataset_id)
        table_ref = dataset_ref.table(table_id)
        table = Table(table_ref)
        self.to_delete.insert(0, table)
        bucket = self._create_bucket(bucket_name)
        self._load_table_for_extract_table(bucket, source_blob_name, table_ref, ROWS)
        destination_blob_name = "person_ages_out.csv"
        destination = bucket.blob(destination_blob_name)
        destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name)

        job = Config.CLIENT.extract_table(table_ref, destination_uri)
        job.result(timeout=100)

        self.to_delete.insert(0, destination)
        got_bytes = retry_storage_errors(destination.download_as_string)()
        got = got_bytes.decode("utf-8")
        self.assertIn("Bharney Rhubble", got) 
Example #2
Source File: server.py    From healthcare-deid with Apache License 2.0 6 votes vote down vote up
def manage_tables(dataset_id, table_id):
  """Create tables in datasets in BigQuery."""
  try:
    get_bq_dataset(dataset_id)
  except exceptions.NotFound as e:
    return flask.jsonify(error=e.code, text=e.message), e.code

  table_ref = bq_client.dataset(dataset_id).table(table_id)
  try:
    jsonschema.validate(flask.request.json, bq_table_schema)
  except jsonschema.ValidationError:
    error_msg = 'unable to validate provided payload.'
    return flask.jsonify(error=400, text=error_msg), 400

  schema = [bigquery.SchemaField(field['name'], field['type'],
                                 field.get('mode') or 'NULLABLE')
            for field in flask.request.json]
  table = bigquery.Table(table_ref, schema=schema)
  try:
    table = bq_client.create_table(table)
  except exceptions.GoogleAPIError as e:
    return flask.jsonify(error=e.message), 400

  return flask.jsonify(result='success'), 200 
Example #3
Source File: bigquery.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def create_table(self, project_id, dataset_id, table_id, schema=None):
        """Creates a BigQuery table from a schema.

        Args:
            project_id (str): Project id.
            dataset_id (str): Dataset id.
            table_id (str): Table id to create.
            schema (dict): BigQuery table schema in JSON format.
        """
        pyschema = []
        if schema is not None:
            schema = TABLE_SCHEMA
        for row in schema:
            field = bigquery.SchemaField(row['name'],
                                         row['type'],
                                         mode=row['mode'])
            pyschema.append(field)
        table_name = f"{project_id}.{dataset_id}.{table_id}"
        LOGGER.info(f"Creating table {table_name}", table_name)
        table = bigquery.Table(table_name, schema=pyschema)
        return self.client.create_table(table) 
Example #4
Source File: analyzer.py    From bigquery-view-analyzer with MIT License 6 votes vote down vote up
def format_tree(self, show_key=False, show_status=False):
        log.info(f"Formatting tree...")
        tree_string = ""
        key = {
            "project": (Fore.CYAN + "◉" + Fore.RESET + " = Project".ljust(12)),
            "dataset": (Fore.YELLOW + "◉" + Fore.RESET + " = Dataset".ljust(12)),
            "table": (Fore.RED + "◉" + Fore.RESET + " = Table".ljust(12)),
            "view": (Fore.GREEN + "◉" + Fore.RESET + " = View".ljust(12)),
        }
        if show_key:
            tree_string += "Key:\n{}{}\n{}{}\n\n".format(
                key["project"], key["table"], key["dataset"], key["view"]
            )
        for pre, _, node in RenderTree(self.tree):
            tree_string += "%s%s\n" % (
                pre,
                node.pretty_name(show_authorization_status=show_status),
            )
        return tree_string 
Example #5
Source File: gbq.py    From pandas-gbq with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def delete(self, table_id):
        """ Delete a table in Google BigQuery

        Parameters
        ----------
        table : str
            Name of table to be deleted
        """
        from google.api_core.exceptions import NotFound

        if not self.exists(table_id):
            raise NotFoundException("Table does not exist")

        table_ref = self.client.dataset(self.dataset_id).table(table_id)
        try:
            self.client.delete_table(table_ref)
        except NotFound:
            # Ignore 404 error which may occur if table already deleted
            pass
        except self.http_error as ex:
            self.process_http_error(ex) 
Example #6
Source File: datamgr.py    From ibis with Apache License 2.0 6 votes vote down vote up
def parquet(tables, data_directory, ignore_missing_dependency, **params):
    try:
        import pyarrow as pa  # noqa: F401
        import pyarrow.parquet as pq  # noqa: F401
    except ImportError:
        msg = 'PyArrow dependency is missing'
        if ignore_missing_dependency:
            logger.warning('Ignored: %s', msg)
            return 0
        else:
            raise click.ClickException(msg)

    data_directory = Path(data_directory)
    for table, df in read_tables(tables, data_directory):
        arrow_table = pa.Table.from_pandas(df)
        target_path = data_directory / '{}.parquet'.format(table)
        pq.write_table(arrow_table, str(target_path)) 
Example #7
Source File: bigquery_component.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def delete_table(self, dataset_id, table_name):
        """Deletes BigQuery table.

        Args:
            dataset_id (str): BigQuery dataset id.
            table_name (str): BigQuery table name.
        """

        table_ref = self.client.dataset(dataset_id).table(table_name)
        try:
            self.client.delete_table(table_ref)
            logger.debug("Deleted table %s from %s dataset", table_name,
                         dataset_id)
        except exceptions.NotFound as error:
            logger.debug(error)
            logger.debug("Table %s not found in %s dataset. No need to delete",
                         table_name, dataset_id) 
Example #8
Source File: test_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table) 
Example #9
Source File: test_nested_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table) 
Example #10
Source File: test_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table) 
Example #11
Source File: test_nested_user_info_updater.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def create_table(self, table_id):
        """Creates test user tables.

         Args:
             table_id(str): ID of the user table to be created.

         Returns:
             The created table (google.cloud.bigquery.table.Table).
         """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=self.bq_schema)
        try:
            self.bq_client.delete_table(table)
            return self.bq_client.create_table(table)
        except exceptions.NotFound:
            return self.bq_client.create_table(table) 
Example #12
Source File: test_update_table_require_partition_filter.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    ) 
Example #13
Source File: create_table.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table] 
Example #14
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_job_cancel(self):
        DATASET_ID = _make_dataset_id("job_cancel")
        JOB_ID_PREFIX = "fetch_" + DATASET_ID
        TABLE_NAME = "test_table"
        QUERY = "SELECT * FROM %s.%s" % (DATASET_ID, TABLE_NAME)

        dataset = self.temp_dataset(DATASET_ID)

        table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)

        job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX)
        job.cancel()

        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        # The `cancel` API doesn't leave any reliable traces on
        # the status of the job resource, so we can't really assert for
        # them here.  The best we can do is not that the API call didn't
        # raise an error, and that the job completed (in the `retry()`
        # above). 
Example #15
Source File: bigquery.py    From loaner with Apache License 2.0 6 votes vote down vote up
def _create_table(self, table_name, entity_instance):
    """Creates a BigQuery Table or attempts to update an existing schema.

    Args:
      table_name: str, name of the table to be created or updated.
      entity_instance: an ndb.Model entity instance to base the schema on.
    """
    table_ref = bigquery.TableReference(self._dataset_ref, table_name)
    entity_schema = _generate_entity_schema(entity_instance)
    table_schema = _generate_schema(entity_schema)
    table = bigquery.Table(table_ref, schema=table_schema)
    try:
      table = self._client.create_table(table)
    except cloud.exceptions.Conflict:
      logging.info('Table %s already exists, attempting to update it.',
                   table_name)
      merged_schema = _merge_schemas(table.schema, table_schema)
      table.schema = merged_schema
      table = self._client.update_table(table, ['schema'])
      logging.info('Table %s updated.', table_name)
    else:
      logging.info('Table %s created.', table_name) 
Example #16
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_update_table_schema(self):
        dataset = self.temp_dataset(_make_dataset_id("update_table"))

        TABLE_NAME = "test_table"
        table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
        self.assertFalse(_table_exists(table_arg))
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)
        self.assertTrue(_table_exists(table))
        voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE")
        schema = table.schema
        schema.append(voter)
        table.schema = schema

        updated_table = Config.CLIENT.update_table(table, ["schema"])

        self.assertEqual(len(updated_table.schema), len(schema))
        for found, expected in zip(updated_table.schema, schema):
            self.assertEqual(found.name, expected.name)
            self.assertEqual(found.field_type, expected.field_type)
            self.assertEqual(found.mode, expected.mode) 
Example #17
Source File: create_table.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table] 
Example #18
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def tearDown(self):
        def _still_in_use(bad_request):
            return any(
                error["reason"] == "resourceInUse" for error in bad_request._errors
            )

        retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
        retry_storage_errors_conflict = RetryErrors(
            (Conflict, TooManyRequests, InternalServerError, ServiceUnavailable)
        )
        for doomed in self.to_delete:
            if isinstance(doomed, storage.Bucket):
                retry_storage_errors_conflict(doomed.delete)(force=True)
            elif isinstance(doomed, (Dataset, bigquery.DatasetReference)):
                retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
            elif isinstance(doomed, (Table, bigquery.TableReference)):
                retry_in_use(Config.CLIENT.delete_table)(doomed)
            else:
                doomed.delete() 
Example #19
Source File: test_update_table_require_partition_filter.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    ) 
Example #20
Source File: test_get_table.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_get_table(capsys, random_table_id, client):

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(random_table_id, schema)
    table.description = "Sample Table"
    table = client.create_table(table)

    get_table.get_table(random_table_id)
    out, err = capsys.readouterr()
    assert "Got table '{}'.".format(random_table_id) in out
    assert "full_name" in out
    assert "Table description: Sample Table" in out
    assert "Table has 0 rows" in out
    client.delete_table(table, not_found_ok=True) 
Example #21
Source File: bigquery_resource_util.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def create_table(self, table_id, schema):
        """Creates a table in BigQuery.

        Args:
            table_id(str): Id of the table to be created.
            schema(List[google.cloud.bigquery.schema.SchemaField]): The
                schema of the table to be created in BigQuery format.

        Returns: The created table (google.cloud.bigquery.table.Table).
        """
        table_ref = self.dataset_ref.table(table_id)
        table = bigquery.Table(table_ref, schema=schema)
        created_table = self.bq_client.create_table(table)
        logging.info('{0:s} Created Table {1:s}'.format(
            str(datetime.datetime.now()), table_id))
        return created_table 
Example #22
Source File: bigQueryUtil.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def create_new_table_from_schema(self, dataset_id, table_id,
                                     schema):
        '''
        https://cloud.google.com/bigquery/docs/tables#create-table

        :param dataset_id: dataset name
        :param table_id: table name
        :param schema:
            schema = [
                bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED', description='blah'),
                bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
            ]
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            #print(f'Table {table_id} in dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Table object to send to the API.
            table = bigquery.Table(table_ref, schema=schema)
            table = self.client.create_table(table)  # API request
            log.info('''
                    Table {} in dataset {}
                    created successfully project: {}.
                    '''.format(table.table_id, dataset_id, self.client.project))
            '''
            print(
                f'Table {table.table_id} in dataset {dataset_id}'
                f'created successfully project: {self.client.project}.'
            )
            ''' 
Example #23
Source File: conftest.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def table_id(client, dataset_id):
    now = datetime.datetime.now()
    table_id = "python_table_sample_{}_{}".format(
        now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]
    )

    table = bigquery.Table("{}.{}".format(dataset_id, table_id))
    table = client.create_table(table)
    yield "{}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    client.delete_table(table, not_found_ok=True) 
Example #24
Source File: test_table_insert_rows_explicit_none_insert_ids.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_table_insert_rows_explicit_none_insert_ids(capsys, random_table_id, client):

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(random_table_id, schema=schema)
    table = client.create_table(table)

    mut.table_insert_rows_explicit_none_insert_ids(random_table_id)
    out, err = capsys.readouterr()
    assert "New rows have been added." in out 
Example #25
Source File: bigquery_component.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def create_table(self, dataset_id, table_name, schema):
        """Creates BigQuery table.

        Args:
            dataset_id (str): BigQuery dataset id.
            table_name (str): BigQuery table name.
            schema (List[google.cloud.bigquery.schema.SchemaField]): Schema
                of the table to be created.
        """

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_name)
        table = bigquery.Table(table_ref, schema)
        self.client.create_table(table) 
Example #26
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_list_rows_empty_table(self):
        from google.cloud.bigquery.table import RowIterator

        dataset_id = _make_dataset_id("empty_table")
        dataset = self.temp_dataset(dataset_id)
        table_ref = dataset.table("empty_table")
        table = Config.CLIENT.create_table(bigquery.Table(table_ref))

        # It's a bit silly to list rows for an empty table, but this does
        # happen as the result of a DDL query from an IPython magic command.
        rows = Config.CLIENT.list_rows(table)
        self.assertIsInstance(rows, RowIterator)
        self.assertEqual(tuple(rows), ()) 
Example #27
Source File: BigQueryConnect_py2.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def create_new_table_from_schema(self, dataset_id, table_id,
                                     schema):
        '''
        https://cloud.google.com/bigquery/docs/tables#create-table

        :param dataset_id: dataset name
        :param table_id: table name
        :param schema:
            schema = [
                bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED', description='blah'),
                bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
            ]
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            #print(f'Table {table_id} in dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Table object to send to the API.
            table = bigquery.Table(table_ref, schema=schema)
            table = self.client.create_table(table)  # API request
            log.info('''
                    Table {} in dataset {}
                    created successfully project: {}.
                    '''.format(table.table_id, dataset_id, self.client.project))
            '''
            print(
                f'Table {table.table_id} in dataset {dataset_id}'
                f'created successfully project: {self.client.project}.'
            )
            ''' 
Example #28
Source File: bigquery.py    From openprescribing with MIT License 5 votes vote down vote up
def create_table(self, table_id, schema):
        table_ref = self.dataset.table(table_id)
        table = gcbq.Table(table_ref, schema=schema)

        try:
            self.gcbq_client.create_table(table)
        except NotFound as e:
            if not dataset_is_missing(e):
                raise
            self.create_dataset()
            self.gcbq_client.create_table(table)

        return Table(table_ref, self) 
Example #29
Source File: BigQueryConnect.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def create_new_table_from_schema(self, dataset_id: str, table_id: str,
                                     schema: List[bigquery.SchemaField]) -> None:
        '''
        https://cloud.google.com/bigquery/docs/tables#create-table

        :param dataset_id: dataset name
        :param table_id: table name
        :param schema:
            schema = [
                bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED', description='blah'),
                bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
            ]
        :return: None
        '''

        dataset_ref = self.client.dataset(dataset_id)
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            print(f'Table {table_id} in dataset {dataset_id} already exists! Skipping create operation.')
        except NotFound:
            # Construct a full Table object to send to the API.
            table = bigquery.Table(table_ref, schema=schema)
            table = self.client.create_table(table)  # API request
            print(
                f'Table {table.table_id} in dataset {dataset_id}'
                f'created successfully project: {self.client.project}.'
            ) 
Example #30
Source File: test_table_exists.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_table_exists(capsys, random_table_id, client):

    table_exists.table_exists(random_table_id)
    out, err = capsys.readouterr()
    assert "Table {} is not found.".format(random_table_id) in out
    table = bigquery.Table(random_table_id)
    table = client.create_table(table)
    table_exists.table_exists(random_table_id)
    out, err = capsys.readouterr()
    assert "Table {} already exists.".format(random_table_id) in out