Python Examples of google.cloud.bigquery.LoadJobConfig

Source File: gcp_utils.py From recommender-tensorflow with MIT License

8 votes

def df_to_bigquery(df, table_id, dataset_id, client):
    table = get_bigquery_table(table_id, dataset_id, client)

    # set config: insert overwrite
    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE
    )

    # insert table
    job = client.load_table_from_dataframe(
        dataframe=df.compute().rename_axis("id"),
        destination=table,
        job_config=job_config
    )
    job.result()
    logger.info('%s rows loaded into %s.%s.%s.', job.output_rows, job.project, dataset_id, table_id)
    return table

Source File: load_table_uri_parquet.py From python-bigquery with Apache License 2.0

7 votes

def load_table_uri_parquet(table_id):
    # [START bigquery_load_table_gcs_parquet]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET,)
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_parquet]

Source File: bigquery.py From openprescribing with MIT License

6 votes

def run_job(self, method_name, args, config_opts, config_default_opts):
        job_config = {
            "copy_table": gcbq.CopyJobConfig,
            "extract_table": gcbq.ExtractJobConfig,
            "load_table_from_file": gcbq.LoadJobConfig,
            "load_table_from_uri": gcbq.LoadJobConfig,
            "query": gcbq.QueryJobConfig,
        }[method_name]()

        for k, v in config_default_opts.items():
            setattr(job_config, k, v)
        for k, v in config_opts.items():
            setattr(job_config, k, v)

        method = getattr(self.gcbq_client, method_name)

        job = method(*args, job_config=job_config)
        if getattr(job_config, "dry_run", False):
            return []
        else:
            return job.result()

Source File: load_table_uri_orc.py From python-bigquery with Apache License 2.0

6 votes

def load_table_uri_orc(table_id):

    # [START bigquery_load_table_gcs_orc]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.ORC)
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_orc]

Source File: load_table_uri_orc.py From python-bigquery with Apache License 2.0

6 votes

def load_table_uri_orc(table_id):

    # [START bigquery_load_table_gcs_orc]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.ORC)
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_orc]

Source File: helper_function.py From professional-services with Apache License 2.0

6 votes

def csv_in_gcs_to_table(bucket_name: str, object_name: str, dataset_id: str,
                        table_id: str,
                        schema: List[bigquery.SchemaField]) -> None:
    """Upload CSV to BigQuery table.
        If the table already exists, it overwrites the table data.

    Args:
        bucket_name: Bucket name for holding the object
        object_name: Name of object to be uploaded
        dataset_id: Dataset id where the table is located.
        table_id: String holding id of hte table.
        schema: Schema of the table_id
    """
    client = bigquery.Client()
    dataset_ref = client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = schema
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    uri = "gs://{}/{}".format(bucket_name, object_name)
    load_job = client.load_table_from_uri(uri,
                                          dataset_ref.table(table_id),
                                          job_config=job_config)
    load_job.result()

Source File: load_table_uri_avro.py From python-bigquery with Apache License 2.0

6 votes

def load_table_uri_avro(table_id):

    # [START bigquery_load_table_gcs_avro]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.AVRO)
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_avro]