Python Examples of googleapiclient.http.MediaIoBaseDownload

Source File: gdrive.py From fb2mobi with MIT License

7 votes

def download(self, file_id, path):
        if not os.path.exists(path):
            os.makedirs(path)

        request = self.service.files().get_media(fileId=file_id)
        name = self.service.files().get(fileId=file_id).execute()['name']

        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()

        file_name = os.path.join(path, name)
        f = open(file_name, 'wb')
        f.write(fh.getvalue())

        return file_name

Source File: download.py From google-drive-folder-downloader with MIT License

6 votes

def download_file(service, file_id, location, filename, mime_type):

    if 'vnd.google-apps' in mime_type:
        request = service.files().export_media(fileId=file_id,
                mimeType='application/pdf')
        filename += '.pdf'
    else:
        request = service.files().get_media(fileId=file_id)
    fh = io.FileIO(location + filename, 'wb')
    downloader = MediaIoBaseDownload(fh, request, 1024 * 1024 * 1024)
    done = False
    while done is False:
        try:
            status, done = downloader.next_chunk()
        except:
            fh.close()
            os.remove(location + filename)
            sys.exit(1)
        print(f'\rDownload {int(status.progress() * 100)}%.', end='')
        sys.stdout.flush()
    print('')

Source File: campaign_manager.py From orchestra with Apache License 2.0

6 votes

def _download_report(self,
      report_id,
      file_id,
      destination_file,
      chunk_size):
    file_metadata = self.cm_hook.get_service().files().get(
        reportId=report_id, fileId=file_id).execute()

    if file_metadata['status'] != 'REPORT_AVAILABLE':
      msg = 'File with ID = %s and Report ID = %s not available, status = %s.'%(
          file_id, report_id, file_metadata['status'])
      raise Exception(msg)

    request = self.cm_hook.get_service().files().get_media(
        reportId=report_id, fileId=file_id)

    downloader = http.MediaIoBaseDownload(
        destination_file, request, chunksize=chunk_size)

    download_finished = False
    while not download_finished:
      _, download_finished = downloader.next_chunk()

    return file_metadata['fileName']

Source File: base_google.py From airflow with Apache License 2.0

6 votes

def download_content_from_request(file_handle, request, chunk_size):
        """
        Download media resources.
        Note that  the Python file object is compatible with io.Base and can be used with this class also.

        :param file_handle: io.Base or file object. The stream in which to write the downloaded
            bytes.
        :type file_handle: io.Base or file object
        :param request: googleapiclient.http.HttpRequest, the media request to perform in chunks.
        :type request: Dict
        :param chunk_size: int, File will be downloaded in chunks of this many bytes.
        :type chunk_size: int
        """
        downloader = MediaIoBaseDownload(file_handle, request, chunksize=chunk_size)
        done = False
        while done is False:
            _, done = downloader.next_chunk()
        file_handle.flush()

Source File: __init__.py From starthinker with Apache License 2.0

6 votes

def download_media(auth, resource_name):
  if project.verbose: print('SDF: Start Download');

  downloadRequest = API_DV360_Beta(auth).media().download_media(resourceName=resource_name).execute(run=False)

  # Create output stream for downloaded file
  outStream = io.BytesIO()

  # Make downloader object
  downloader = MediaIoBaseDownload(outStream, downloadRequest)

  # Download media file in chunks until finished
  download_finished = False
  while download_finished is False:
    _, download_finished = downloader.next_chunk()

  if project.verbose: print('SDF: End Download');

  return outStream

Source File: google.py From pghoard with Apache License 2.0

6 votes

def get_contents_to_fileobj(self, key, fileobj_to_store_to, *, progress_callback=None):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r to %r", key, fileobj_to_store_to)
        next_prog_report = 0.0
        last_log_output = 0.0
        with self._object_client(not_found=key) as clob:
            req = clob.get_media(bucket=self.bucket_name, object=key)
            download = MediaIoBaseDownload(fileobj_to_store_to, req, chunksize=DOWNLOAD_CHUNK_SIZE)
            done = False
            while not done:
                status, done = self._retry_on_reset(getattr(download, "_request", None), download.next_chunk)
                if status:
                    progress_pct = status.progress() * 100
                    now = time.monotonic()
                    if (now - last_log_output) >= 5.0:
                        self.log.debug("Download of %r: %d%%", key, progress_pct)
                        last_log_output = now

                    if progress_callback and progress_pct > next_prog_report:
                        progress_callback(progress_pct, 100)
                        next_prog_report = progress_pct + 0.1
            return self._metadata_for_key(clob, key)

Source File: bucket_storage.py From kubeface with Apache License 2.0

6 votes

def get(name, output_handle=None):
    (bucket_name, file_name) = split_bucket_and_name(name)

    if output_handle is None:
        output_handle = tempfile.TemporaryFile(
            prefix="kubeface-bucket-storage-",
            suffix=".data")

    # Use get_media instead of get to get the actual contents of the object
    req = get_service().objects().get_media(
        bucket=bucket_name,
        object=file_name)
    downloader = http.MediaIoBaseDownload(output_handle, req)

    done = False
    while done is False:
        (status, done) = downloader.next_chunk()
        logging.debug("Download {}%.".format(int(status.progress() * 100)))
    output_handle.seek(0)
    return output_handle

Source File: utils.py From colab-tf-utils with GNU General Public License v3.0

5 votes

def download_file_to_folder(self, remote_file, path):
        """
        Download a GDriveItem to a local folder
        :param remote_file:
        :param path:
        :return:
        """
        assert type(remote_file)==GDriveItem
        request = self.drive_service.files().get_media(fileId=remote_file.fid)

        last_progress = 0

        pbar = tqdm(total=100, desc='Downloading file %s to %s' % (remote_file.name, path))

        with open(path, 'wb') as fh:
            downloader = MediaIoBaseDownload(fh, request)
            done = False
            while done is False:
                status, done = downloader.next_chunk()
                if status:
                    p = status.progress() * 100
                    dp = p - last_progress
                    pbar.update(dp)
                    last_progress = p

        pbar.update(100 - last_progress)

Source File: utils.py From drive-cli with MIT License

5 votes

def concat(fid):
    token = os.path.join(dirpath, 'token.json')
    store = file.Storage(token)
    creds = store.get()
    service = build('drive', 'v3', http=creds.authorize(Http()))
    fh = io.BytesIO()
    item = get_file(fid)
    request, ext = get_request(service, fid, item['mimeType'])
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
    print(fh.getvalue().decode('utf-8'))

Source File: utils.py From drive-cli with MIT License

5 votes

def file_download(item, cwd, clone=False):
    token = os.path.join(dirpath, 'token.json')
    store = file.Storage(token)
    creds = store.get()
    service = build('drive', 'v3', http=creds.authorize(Http()))
    fid = item['id']
    fname = item['name']
    fh = io.BytesIO()
    click.echo("Preparing: " + click.style(fname, fg='red') + " for download")
    request, ext = get_request(service, fid, item['mimeType'])
    file_path = (os.path.join(cwd, fname) + ext)
    if(not clone and (os.path.exists(file_path)) and (not write_needed(file_path, item))):
        return
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    with click.progressbar(length=100, label='downloading file') as bar:
        pstatus = 0
        while done is False:
            status, done = downloader.next_chunk()
            status = int(status.progress() * 100)
            bar.update(int(status - pstatus))
            pstatus = status
        with open(file_path, 'wb') as f:
            f.write(fh.getvalue())
    data = drive_data()
    data[file_path] = {'id': item['id'], 'time': time.time()}
    drive_data(data)
    click.secho("completed download of " + fname, fg='yellow')

Source File: storage.py From forseti-security with Apache License 2.0

5 votes

def download(self, bucket, object_name):
        """Download an object from a bucket.

        Args:
            bucket (str): The name of the bucket to read from.
            object_name (str): The name of the object to read.

        Returns:
            str: The contents of the object.
        """
        verb_arguments = {
            'bucket': bucket,
            'object': object_name}

        media_request = self._build_request('get_media', verb_arguments)

        if hasattr(self.http, 'data'):
            if not isinstance(self.http.data, bytes):
                self.http.data = self.http.data.encode()

        media_request.http = self.http

        file_content = ''
        out_stream = io.BytesIO()
        try:
            downloader = http.MediaIoBaseDownload(out_stream, media_request)
            done = False
            while not done:
                _, done = downloader.next_chunk(num_retries=self._num_retries)
            file_content = out_stream.getvalue()
        finally:
            out_stream.close()
        return file_content

Source File: GoogleDriveDownloader.py From VideoSuperResolution with MIT License

5 votes

def drive_download(name, fileid, path):
  store_path = Path(path) / name
  if store_path.exists():
    print("{} exists, skip download.".format(name))
    return store_path
  # The file token.json stores the user's access and refresh tokens, and is
  # created automatically when the authorization flow completes for the first
  # time.
  store = file.Storage('/tmp/token.json')
  creds = store.get()
  if not creds or creds.invalid:
    creds = require_authorize(store, CREDENTIALS, SCOPES)
  service = build('drive', 'v3', http=creds.authorize(Http()))

  request = service.files().get_media(fileId=fileid)

  fh = io.FileIO(store_path.resolve(), 'wb')
  downloader = MediaIoBaseDownload(fh, request)
  done = False
  while not done:
    status, done = downloader.next_chunk()
    print("\rDownload {}%.".format(int(status.progress() * 100)))
  print('\n', flush=True)
  if done:
    return store_path

Source File: customer_supplied_keys.py From python-docs-samples with Apache License 2.0

5 votes

def download_object(bucket, obj, out_file, encryption_key, key_hash):
    """Downloads an object protected by a custom encryption key."""
    service = create_service()

    request = service.objects().get_media(bucket=bucket, object=obj)
    request.headers['x-goog-encryption-algorithm'] = 'AES256'
    request.headers['x-goog-encryption-key'] = encryption_key
    request.headers['x-goog-encryption-key-sha256'] = key_hash

    # Unfortunately, http.MediaIoBaseDownload overwrites HTTP headers,
    # and so it cannot be used here. Instead, we shall download as a
    # single request.
    out_file.write(request.execute())

Source File: drive.py From platypush with MIT License

5 votes

def download(self, file_id: str, path: str) -> str:
        """
        Download a Google Drive file locally.

        :param file_id: Path of the file to upload.
        :param path: Path of the file to upload.
        :return: The local file path.
        """
        # noinspection PyPackageRequirements
        from googleapiclient.http import MediaIoBaseDownload

        service = self.get_service()
        request = service.files().get_media(fileId=file_id)
        path = os.path.abspath(os.path.expanduser(path))
        if os.path.isdir(path):
            name = service.files().get(fileId=file_id).execute().get('name')
            path = os.path.join(path, name)

        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)
        done = False

        while not done:
            status, done = downloader.next_chunk()
            self.logger.info('Download progress: {}%'.format(status.progress()))

        with open(path, 'wb') as f:
            f.write(fh.getbuffer().tobytes())
        return path

Source File: uds.py From uds with GNU Affero General Public License v3.0

5 votes

def convert_file(self, file_id):
        # Get file metadata
        metadata = service.files().get(fileId=file_id, fields="name").execute()

        # Download the file and then call do_upload() on it
        request = service.files().get_media(fileId=file_id)
        path = "%s/%s" % (get_downloads_folder(), metadata['name'])
        fh = io.FileIO(path, "wb")
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            _, done = downloader.next_chunk()

        print("Downloaded %s" % metadata['name'])
        do_upload(path, service)

        # An alternative method would be to use partial download headers
        # and convert and upload the parts individually. Perhaps a
        # future release will implement this.

    # Mode sets the mode of updating 0 > Verbose, 1 > Notification, 2 > silent

Source File: uds.py From uds with GNU Affero General Public License v3.0

5 votes

def download_part(self, part_id):
        """

        :param part_id: 
        :return: 
        """
        request = self.api.export_media(part_id)
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
        return fh.getvalue()

Source File: __init__.py From starthinker with Apache License 2.0

5 votes

def object_get_chunks(auth, path, chunksize=CHUNKSIZE, encoding=None):
  bucket, filename = path.split(':', 1)
  service = get_service('storage', 'v1', auth)

  data = BytesIO()
  request = service.objects().get_media(bucket=bucket, object=filename)
  media = MediaIoBaseDownload(data, request, chunksize=chunksize)

  retries = 0
  done = False
  while not done:
    error = None
    try:
      progress, done = media.next_chunk()
      if progress: print('Download %d%%' % int(progress.progress() * 100))
      data.seek(0)
      #yield data
      yield data.read().decode(encoding) if encoding else data
      data.seek(0)
      data.truncate(0)
    except HttpError as err:
      error = err
      if err.resp.status < 500: raise
    except (httplib2.HttpLib2Error, IOError) as err:
      error = err

    if error:
      retries += 1
      if retries > RETRIES: raise error
      else: sleep(5 * retries)
    else:
      retries = 0

  print('Download End')

Source File: __init__.py From starthinker with Apache License 2.0

5 votes

def media_download(request, chunksize, encoding=None):
  data = BytesIO()
  media = MediaIoBaseDownload(data, request, chunksize=chunksize)

  retries = 0
  done = False
  while not done:
    error = None
    try:
      progress, done = media.next_chunk()
      if progress: print('Download %d%%' % int(progress.progress() * 100))
      data.seek(0)
      yield data.read().decode(encoding) if encoding else data
      data.seek(0)
      data.truncate(0)
    except HttpError as err:
      error = err
      if err.resp.status < 500: raise
    except (httplib2.HttpLib2Error, IOError) as err:
      error = err

    if error:
      retries += 1
      if retries > RETRIES: raise error
      else: sleep(5 * retries)
    else:
      retries = 0

  print('Download 100%')

Source File: campaign_manager.py From airflow with Apache License 2.0

5 votes

def execute(self, context: Dict):
        hook = GoogleCampaignManagerHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
        )
        gcs_hook = GCSHook(
            google_cloud_storage_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to
        )
        # Get name of the report
        report = hook.get_report(
            file_id=self.file_id, profile_id=self.profile_id, report_id=self.report_id
        )
        report_name = self.report_name or report.get("fileName", str(uuid.uuid4()))
        report_name = self._resolve_file_name(report_name)

        # Download the report
        self.log.info("Starting downloading report %s", self.report_id)
        request = hook.get_report_file(
            profile_id=self.profile_id, report_id=self.report_id, file_id=self.file_id
        )
        with tempfile.NamedTemporaryFile() as temp_file:
            downloader = http.MediaIoBaseDownload(
                fd=temp_file, request=request, chunksize=self.chunk_size
            )
            download_finished = False
            while not download_finished:
                _, download_finished = downloader.next_chunk()

            temp_file.flush()
            # Upload the local file to bucket
            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=report_name,
                gzip=self.gzip,
                filename=temp_file.name,
                mime_type="text/csv",
            )

        self.xcom_push(context, key="report_name", value=report_name)

Source File: dsub_util.py From dsub with Apache License 2.0

5 votes

def _load_file_from_gcs(gcs_file_path, credentials=None):
  """Load context from a text file in gcs.

  Args:
    gcs_file_path: The target file path; should have the 'gs://' prefix.
    credentials: Optional credential to be used to load the file from gcs.

  Returns:
    The content of the text file as a string.
  """
  gcs_service = _get_storage_service(credentials)

  bucket_name, object_name = gcs_file_path[len('gs://'):].split('/', 1)
  request = gcs_service.objects().get_media(
      bucket=bucket_name, object=object_name)

  file_handle = io.BytesIO()
  downloader = MediaIoBaseDownload(file_handle, request, chunksize=1024 * 1024)
  done = False
  while not done:
    _, done = _downloader_next_chunk(downloader)
  filevalue = file_handle.getvalue()
  if not isinstance(filevalue, six.string_types):
    filevalue = filevalue.decode()
  return six.StringIO(filevalue)

Source File: download-2.py From google-drive-folder-downloader with MIT License

5 votes

def download_file(service, file_id, location, filename, mime_type):

    if 'vnd.google-apps' in mime_type:
        request = service.files().export_media(fileId=file_id,
                mimeType='application/pdf')
        filename += '.pdf'
    else:
        request = service.files().get_media(fileId=file_id)
    fh = io.FileIO(location + filename, 'wb')
    downloader = MediaIoBaseDownload(fh, request, 1024 * 1024 * 1024)
    done = False
    while done is False:
        try:
            status, done = downloader.next_chunk()
        except:
            fh.close()
            os.remove(location + filename)
            sys.exit(1)
        print '\rDownload {}%.'.format(int(status.progress() * 100)),
        sys.stdout.flush()
    print ''

Source File: storage.py From forseti-security with Apache License 2.0

4 votes

def download_to_file(self, bucket, object_name, output_file):
        """Download an object from a bucket.

         Args:
            bucket (str): The name of the bucket to read from.
            object_name (str): The name of the object to read.
            output_file (file): The file object to write the data to.

         Returns:
            int: Total size in bytes of file.

         Raises:
            HttpError: HttpError is raised if the call to the GCP storage API
                fails
        """
        done = False
        ignored_http_error_codes = [416]
        progress = None
        verb_arguments = {
            'bucket': bucket,
            'object': object_name}

        media_request = self._build_request('get_media', verb_arguments)

        if hasattr(self.http, 'data'):
            if not isinstance(self.http.data, bytes):
                self.http.data = self.http.data.encode()

        media_request.http = self.http

        downloader = http.MediaIoBaseDownload(output_file, media_request)
        try:
            while not done:
                progress, done = downloader.next_chunk(
                    num_retries=self._num_retries)
        except errors.HttpError as e:
            if e.resp.status in ignored_http_error_codes:
                LOGGER.debug(f'Ignoring HTTP error code {e.resp.status} for '
                             f'downloading {object_name} object from GCS '
                             f'bucket {bucket}.')
                return 0
            raise
        return progress.total_size

Python googleapiclient.http.MediaIoBaseDownload() Examples