Python Examples of six.moves.urllib.request.urlretrieve

Source File: squad_preprocess.py From cs224n-win18-squad with Apache License 2.0

6 votes

def maybe_download(url, filename, prefix, num_bytes=None):
    """Takes an URL, a filename, and the expected bytes, download
    the contents and returns the filename.
    num_bytes=None disables the file size check."""
    local_filename = None
    if not os.path.exists(os.path.join(prefix, filename)):
        try:
            print "Downloading file {}...".format(url + filename)
            with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
                local_filename, _ = urlretrieve(url + filename, os.path.join(prefix, filename), reporthook=reporthook(t))
        except AttributeError as e:
            print "An error occurred when downloading the file! Please get the dataset using a browser."
            raise e
    # We have a downloaded file
    # Check the stats and make sure they are ok
    file_stats = os.stat(os.path.join(prefix, filename))
    if num_bytes is None or file_stats.st_size == num_bytes:
        print "File {} successfully loaded".format(filename)
    else:
        raise Exception("Unexpected dataset size. Please get the dataset using a browser.")

    return local_filename

Source File: file_utils.py From Benchmarks with MIT License

6 votes

def urlretrieve(url, filename, reporthook=None, data=None):
        def chunk_read(response, chunk_size=8192, reporthook=None):
            total_size = response.info().get('Content-Length').strip()
            total_size = int(total_size)
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                count += 1
                if not chunk:
                    reporthook(count, total_size, total_size)
                    break
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk)

Source File: data_utils.py From KerasNeuralFingerprint with MIT License

6 votes

def urlretrieve(url, filename, reporthook=None, data=None):
        def chunk_read(response, chunk_size=8192, reporthook=None):
            total_size = response.info().get('Content-Length').strip()
            total_size = int(total_size)
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                if not chunk:
                    break
                count += 1
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk)

Source File: downloader.py From auptimizer with GNU General Public License v3.0

6 votes

def download_data_url(url, download_dir):
	filename = url.split('/')[-1]
	file_path = os.path.join(download_dir, filename)
	
	if not os.path.exists(file_path):
		os.makedirs(download_dir, exist_ok=True)
		
		print('Download %s to %s' % (url, file_path))
		file_path, _ = request.urlretrieve(
			url=url,
			filename=file_path,
			reporthook=report_download_progress)
		
		print('\nExtracting files')
		if file_path.endswith('.zip'):
			zipfile.ZipFile(file=file_path, mode='r').extractall(download_dir)
		elif file_path.endswith(('.tar.gz', '.tgz')):
			tarfile.open(name=file_path, mode='r:gz').extractall(download_dir)

Source File: squad_preprocess.py From squad-transformer with Apache License 2.0

6 votes

def maybe_download(url, filename, prefix, num_bytes=None):
    """Takes an URL, a filename, and the expected bytes, download
    the contents and returns the filename.
    num_bytes=None disables the file size check."""
    local_filename = None
    output_path = os.path.join(prefix, filename)
    if not os.path.exists(output_path):
        try:
            print("Downloading file {} to {}...".format(url + filename, output_path))
            with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
                local_filename, _ = urlretrieve(url + filename, output_path, reporthook=reporthook(t))
        except AttributeError as e:
            print("An error occurred when downloading the file! Please get the dataset using a browser.")
            raise e
    # We have a downloaded file
    # Check the stats and make sure they are ok
    file_stats = os.stat(os.path.join(prefix, filename))
    if num_bytes is None or file_stats.st_size == num_bytes:
        print("File {} successfully downloaded to {}.".format(filename, output_path))
    else:
        raise Exception("Unexpected dataset size. Please get the dataset using a browser.")

    return local_filename

Source File: data_utils.py From Machine-Learning-with-TensorFlow-1.x with MIT License

6 votes

def download_file(file_url, output_file_dir, expected_size, FORCE=False):
    name = file_url.split('/')[-1]
    file_output_path = os.path.join(output_file_dir, name)
    print('Attempting to download ' + file_url)
    print('File output path: ' + file_output_path)
    print('Expected size: ' + str(expected_size))
    if not os.path.isdir(output_file_dir):
        os.makedirs(output_file_dir)

    if os.path.isfile(file_output_path) and os.stat(file_output_path).st_size == expected_size and not FORCE:
        print('File already downloaded completely!')
        return file_output_path
    else:
        print(' ')
        filename, _ = urlretrieve(file_url, file_output_path, download_hook_function)
        print(' ')
        statinfo = os.stat(filename)
        print(statinfo.st_size)
        if statinfo.st_size == expected_size:
            print('Found and verified', filename)
        else:
            raise Exception('Could not download ' + filename)
        return filename

Source File: data_utils.py From Machine-Learning-with-TensorFlow-1.x with MIT License

6 votes

def download_file(file_url, output_file_dir, expected_size, FORCE=False):
    name = file_url.split('/')[-1]
    file_output_path = os.path.join(output_file_dir, name)
    print('Attempting to download ' + file_url)
    print('File output path: ' + file_output_path)
    print('Expected size: ' + str(expected_size))
    if not os.path.isdir(output_file_dir):
        os.makedirs(output_file_dir)

    if os.path.isfile(file_output_path) and os.stat(file_output_path).st_size == expected_size and not FORCE:
        print('File already downloaded completely!')
        return file_output_path
    else:
        print(' ')
        filename, _ = urlretrieve(file_url, file_output_path, download_hook_function)
        print(' ')
        statinfo = os.stat(filename)
        if statinfo.st_size == expected_size:
            print('Found and verified', filename)
        else:
            raise Exception('Could not download ' + filename)
        return filename

Source File: exchange_utils.py From catalyst with Apache License 2.0

6 votes

def download_exchange_symbols(exchange_name):
    """
    Downloads the exchange's symbols.json from the repository.

    Parameters
    ----------
    exchange_name: str
    environ:

    Returns
    -------
    str

    """
    filename = get_exchange_symbols_filename(exchange_name)
    url = SYMBOLS_URL.format(exchange=exchange_name)
    response = request.urlretrieve(url=url, filename=filename)
    return response

Source File: downloads.py From mead-baseline with Apache License 2.0

6 votes

def web_downloader(url, path_to_save=None):
    # Use a class to simulate the nonlocal keyword in 2.7
    class Context: pg = None

    def _report_hook(count, block_size, total_size):
        if Context.pg is None:
            length = int((total_size + block_size - 1) / float(block_size)) if total_size != -1 else 1
            Context.pg = create_progress_bar(length)
        Context.pg.update()

    if not path_to_save:
        path_to_save = "/tmp/data.dload-{}".format(os.getpid())
    try:
        path_to_save, _ = urlretrieve(url, path_to_save, reporthook=_report_hook)
        Context.pg.done()
    except Exception as e:  # this is too broad but there are too many exceptions to handle separately
        raise RuntimeError("failed to download data from [url]: {} [to]: {}".format(url, path_to_save))
    return path_to_save

Source File: vectorizers.py From mead-baseline with Apache License 2.0

6 votes

def load_bert_vocab(vocab_file):
    global BERT_VOCAB
    if BERT_VOCAB is not None:
        return BERT_VOCAB

    if validate_url(vocab_file):
        print(f'Downloading {vocab_file}')
        vocab_file, _ = urlretrieve(vocab_file)

    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, "r") as rf:
        for line in rf:
            token = convert_to_unicode(line)
            if not token:
                break
            token = token.strip()
            vocab[token] = index
            index += 1
    BERT_VOCAB = vocab
    return vocab

Source File: download.py From chainer with MIT License

6 votes

def download(url, dst_file_path):
    # Download a file, showing progress
    bar_wrap = [None]

    def reporthook(count, block_size, total_size):
        bar = bar_wrap[0]
        if bar is None:
            bar = progressbar.ProgressBar(
                maxval=total_size,
                widgets=[
                    progressbar.Percentage(),
                    ' ',
                    progressbar.Bar(),
                    ' ',
                    progressbar.FileTransferSpeed(),
                    ' | ',
                    progressbar.ETA(),
                ])
            bar.start()
            bar_wrap[0] = bar
        bar.update(min(count * block_size, total_size))

    request.urlretrieve(url, dst_file_path, reporthook=reporthook)

Source File: utils.py From ctc_tensorflow_example with MIT License

6 votes

def maybe_download(filename, expected_bytes, force=False):
    """Download a file if not present, and make sure it's the right size."""
    if force or not os.path.exists(filename):
        print('Attempting to download:', filename)
        filename, _ = urlretrieve(url + filename, filename,
                                  reporthook=download_progress_hook)
        print('\nDownload Complete!')
    statinfo = os.stat(filename)

    if statinfo.st_size == expected_bytes:
        print('Found and verified', filename)
    else:
        raise Exception(
                        'Failed to verify ' + filename + \
                        '. Can you get to it with a browser?')
    return filename

Source File: util.py From azure-cli-extensions with MIT License

6 votes

def retrieve_file_from_url(url):
    """
    Retrieve a file from an URL

    Args:
        url: The URL to retrieve the file from.

    Returns:
        The absolute path of the downloaded file.
    """
    try:
        alias_source, _ = urlretrieve(url)
        # Check for HTTPError in Python 2.x
        with open(alias_source, 'r') as f:
            content = f.read()
            if content[:3].isdigit():
                raise CLIError(ALIAS_FILE_URL_ERROR.format(url, content.strip()))
    except Exception as exception:
        if isinstance(exception, CLIError):
            raise

        # Python 3.x
        raise CLIError(ALIAS_FILE_URL_ERROR.format(url, exception))

    return alias_source

Source File: 1_prepare_pickle.py From Neural-Network-Programming-with-TensorFlow with MIT License

6 votes

def maybe_download(filename, expected_bytes, force=False):
  """Download a file if not present, and make sure it's the right size."""
  dest_filename = os.path.join(data_root, filename)
  if force or not os.path.exists(dest_filename):
    print('Attempting to download:', filename) 
    filename, _ = urlretrieve(url + filename, dest_filename, reporthook=download_progress_hook)
    print('\nDownload Complete!')
  statinfo = os.stat(dest_filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified', dest_filename)
  else:
    raise Exception(
      'Failed to verify ' + dest_filename + '. Can you get to it with a browser?')
  return dest_filename


#num_classes = 10

Source File: data_utils.py From deepQuest with BSD 3-Clause "New" or "Revised" License

5 votes

def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                count += 1
                if not chunk:
                    reporthook(count, total_size, total_size)
                    break
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk)

Source File: data.py From tensorboardX with MIT License

5 votes

def download_mnist_data():
    print('Downloading {:s}...'.format(train_images))
    request.urlretrieve('{:s}/{:s}'.format(parent, train_images), train_images)
    print('Done')
    print('Downloading {:s}...'.format(train_labels))
    request.urlretrieve('{:s}/{:s}'.format(parent, train_labels), train_labels)
    print('Done')
    print('Downloading {:s}...'.format(test_images))
    request.urlretrieve('{:s}/{:s}'.format(parent, test_images), test_images)
    print('Done')
    print('Downloading {:s}...'.format(test_labels))
    request.urlretrieve('{:s}/{:s}'.format(parent, test_labels), test_labels)
    print('Done')

    print('Converting training data...')
    data_train, target_train = load_mnist(train_images, train_labels,
                                          num_train)
    print('Done')
    print('Converting test data...')
    data_test, target_test = load_mnist(test_images, test_labels, num_test)
    mnist = {'data': np.append(data_train, data_test, axis=0),
             'target': np.append(target_train, target_test, axis=0)}
    print('Done')
    print('Save output...')
    with open('mnist.pkl', 'wb') as output:
        six.moves.cPickle.dump(mnist, output, -1)
    print('Done')
    print('Convert completed')

Source File: data_utils.py From DeepLearning_Wavelet-LSTM with MIT License

5 votes

def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while True:
                chunk = response.read(chunk_size)
                count += 1
                if reporthook is not None:
                    reporthook(count, chunk_size, total_size)
                if chunk:
                    yield chunk
                else:
                    break

        with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
                for chunk in chunk_read(response, reporthook=reporthook):
                    fd.write(chunk)

Source File: data_utils.py From DeepLearning_Wavelet-LSTM with MIT License

5 votes

def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while True:
                chunk = response.read(chunk_size)
                count += 1
                if reporthook is not None:
                    reporthook(count, chunk_size, total_size)
                if chunk:
                    yield chunk
                else:
                    break

        with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
                for chunk in chunk_read(response, reporthook=reporthook):
                    fd.write(chunk)

Source File: load_data.py From NumNum with MIT License

5 votes

def maybe_download(filename, force=False):
  """Download a file if not present, and make sure it's the right size."""
  if force or not os.path.exists(filename):
    print('Attempting to download:', filename) 
    filename, _ = urlretrieve(url + filename, filename)
    print('Download Complete!')
  statinfo = os.stat(filename)
  return filename

Source File: downloader.py From chakin with MIT License

5 votes

def download(number=-1, name="", save_dir='./'):
    """Download pre-trained word vector
    :param number: integer, default ``None``
    :param save_dir: str, default './'
    :return: file path for downloaded file
    """
    df = load_datasets()

    if number > -1:
        row = df.iloc[[number]]
    elif name:
        row = df.loc[df["Name"] == name]

    url = ''.join(row.URL)
    if not url:
        print('The word vector you specified was not found. Please specify correct name.')

    widgets = ['Test: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ', FileTransferSpeed()]
    pbar = ProgressBar(widgets=widgets)

    def dlProgress(count, blockSize, totalSize):
        if pbar.max_value is None:
            pbar.max_value = totalSize
            pbar.start()

        pbar.update(min(count * blockSize, totalSize))

    file_name = url.split('/')[-1]
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, file_name)
    path, _ = urlretrieve(url, save_path, reporthook=dlProgress)
    pbar.finish()
    return path

Source File: 5_word2vec.py From udacity-deep-learning with GNU General Public License v3.0

5 votes

def maybe_download(filename, expected_bytes):
    """Download a file if not present, and make sure it's the right size."""
    if not os.path.exists(filename):
        filename, _ = urlretrieve(url + filename, filename)
    statinfo = os.stat(filename)
    if statinfo.st_size == expected_bytes:
        print('Found and verified %s' % filename)
    else:
        print(statinfo.st_size)
        raise Exception(
            'Failed to verify ' + filename + '. Can you get to it with a browser?')
    return filename

Source File: 1_notmnist.py From udacity-deep-learning with GNU General Public License v3.0

5 votes

def maybe_download(filename, expected_bytes, force=False):
    """Download a file if not present, and make sure it's the right size."""
    if force or not os.path.exists(filename):
        print('Attempting to download:', filename)
        filename, _ = urlretrieve(url + filename, filename, reporthook=download_progress_hook)
        print('\nDownload Complete!')
    statinfo = os.stat(filename)
    if statinfo.st_size == expected_bytes:
        print('Found and verified', filename)
    else:
        raise Exception(
            'Failed to verify ' + filename + '. Can you get to it with a browser?')
    return filename

Source File: 6_lstm.py From udacity-deep-learning with GNU General Public License v3.0

5 votes

def maybe_download(filename, expected_bytes):
    """Download a file if not present, and make sure it's the right size."""
    if not os.path.exists(filename):
        filename, _ = urlretrieve(url + filename, filename)
    statinfo = os.stat(filename)
    if statinfo.st_size == expected_bytes:
        print('Found and verified %s' % filename)
    else:
        print(statinfo.st_size)
        raise Exception(
            'Failed to verify ' + filename + '. Can you get to it with a browser?')
    return filename

Source File: utils.py From pretorched-x with MIT License

5 votes

def download_url(url, destination=None, progress_bar=True):
    """Download a URL to a local file.

    Parameters
    ----------
    url : str
        The URL to download.
    destination : str, None
        The destination of the file. If None is given the file is saved to a temporary directory.
    progress_bar : bool
        Whether to show a command-line progress bar while downloading.

    Returns
    -------
    filename : str
        The location of the downloaded file.

    Notes
    -----
    Progress bar use/example adapted from tqdm documentation: https://github.com/tqdm/tqdm
    """

    def my_hook(t):
        last_b = [0]

        def inner(b=1, bsize=1, tsize=None):
            if tsize is not None:
                t.total = tsize
            if b > 0:
                t.update((b - last_b[0]) * bsize)
            last_b[0] = b

        return inner

    if progress_bar:
        with tqdm(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
            filename, _ = urlretrieve(url, filename=destination, reporthook=my_hook(t))
    else:
        filename, _ = urlretrieve(url, filename=destination)

Source File: attribute_loader.py From verb-attributes with MIT License

5 votes

def load_word_vectors(root, wv_type, dim):
    """Load word vectors from a path, trying .pt, .txt, and .zip extensions."""
    if isinstance(dim, int):
        dim = str(dim) + 'd'
    fname = os.path.join(root, wv_type + '.' + dim)
    if os.path.isfile(fname + '.pt'):
        fname_pt = fname + '.pt'
        print('loading word vectors from', fname_pt)
        return torch.load(fname_pt)
    if os.path.isfile(fname + '.txt'):
        fname_txt = fname + '.txt'
        cm = open(fname_txt, 'rb')
        cm = [line for line in cm]
    elif os.path.basename(wv_type) in URL:
        url = URL[wv_type]
        print('downloading word vectors from {}'.format(url))
        filename = os.path.basename(fname)
        if not os.path.exists(root):
            os.makedirs(root)
        with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
            fname, _ = urlretrieve(url, fname, reporthook=reporthook(t))
            with zipfile.ZipFile(fname, "r") as zf:
                print('extracting word vectors into {}'.format(root))
                zf.extractall(root)
        if not os.path.isfile(fname + '.txt'):
            raise RuntimeError('no word vectors of requested dimension found')
        return load_word_vectors(root, wv_type, dim)
    else:
        raise RuntimeError('unable to load word vectors')
#######

Source File: data_utils.py From DeepLearning_Wavelet-LSTM with MIT License

5 votes

def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while True:
                chunk = response.read(chunk_size)
                count += 1
                if reporthook is not None:
                    reporthook(count, chunk_size, total_size)
                if chunk:
                    yield chunk
                else:
                    break

        with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
                for chunk in chunk_read(response, reporthook=reporthook):
                    fd.write(chunk)

Source File: flask_cloudy.py From flask-cloudy with MIT License

5 votes

def _download_from_url(self, url):
        """
        Download a url and return the tmp path
        :param url:
        :return:
        """
        ext = get_file_extension(url)
        if "?" in url:
            ext = get_file_extension(os.path.splitext(url.split("?")[0]))
        filepath = "/tmp/%s.%s" % (uuid.uuid4().hex, ext)
        request.urlretrieve(url, filepath)
        return filepath

Source File: char_language_model.py From tf-tutorial with MIT License

5 votes

def maybe_download(filename='text8.zip', expected_bytes=31344016,
                   default_rul='http://mattmahoney.net/dc/'):
  """Download a file if not present, and make sure it's the right size."""
  if not os.path.exists(filename):
    filename, _ = urlretrieve(default_rul + filename, filename)
  statinfo = os.stat(filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified %s' % filename)
  else:
    print(statinfo.st_size)
    raise Exception(
      'Failed to verify ' + filename + '. Can you get to it with a browser?')
  return filename

Source File: preprocess.py From bidaf-keras with GNU General Public License v3.0

5 votes

def maybe_download(base_url, filename, destination_dir, show_progress=True):
    class DownloadProgressBar(tqdm):
        def update_to(self, b=1, bsize=1, tsize=None):
            """
            b: int, optional
                Number of blocks just transferred [default: 1].
            bsize: int, optional
                Size of each block (in tqdm units) [default: 1].
            tsize: int, optional
                Total size (in tqdm units). If [default: None] remains unchanged.
            """
            if tsize is not None:
                self.total = tsize
            self.update(b * bsize - self.n)

    local_filename = None
    if not os.path.exists(os.path.join(destination_dir, filename)):
        try:
            if show_progress:
                print("Downloading file {}...".format(base_url + filename))
                # Download with a progress bar
                with DownloadProgressBar(unit='B', unit_scale=True,
                                         miniters=1, desc=filename) as t:
                    local_filename, _ = urlretrieve(base_url + filename,
                                                    filename=os.path.join(destination_dir, filename),
                                                    reporthook=t.update_to)
            else:
                # Simple download with no progress bar
                local_filename, _ = urlretrieve(base_url + filename, filename=os.path.join(destination_dir, filename))

            print("File {} successfully loaded".format(filename))
        except AttributeError as e:
            print("An error occurred when downloading the file! Please get the dataset using a browser.")
            raise e
        except KeyboardInterrupt as k:
            if os.path.exists(os.path.join(destination_dir, filename)):
                os.remove(os.path.join(destination_dir, filename))
            raise k

Source File: data_utils.py From dspp-keras with GNU Affero General Public License v3.0

5 votes

def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """
        def chunk_read(response, chunk_size=8192, reporthook=None):
            total_size = response.info().get('Content-Length').strip()
            total_size = int(total_size)
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                count += 1
                if not chunk:
                    reporthook(count, total_size, total_size)
                    break
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk)

Python six.moves.urllib.request.urlretrieve() Examples