Python six.moves.urllib.request.urlretrieve() Examples

The following are 30 code examples of six.moves.urllib.request.urlretrieve(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module six.moves.urllib.request , or try the search function .
Example #1
Source File: squad_preprocess.py    From cs224n-win18-squad with Apache License 2.0 6 votes vote down vote up
def maybe_download(url, filename, prefix, num_bytes=None):
    """Takes an URL, a filename, and the expected bytes, download
    the contents and returns the filename.
    num_bytes=None disables the file size check."""
    local_filename = None
    if not os.path.exists(os.path.join(prefix, filename)):
        try:
            print "Downloading file {}...".format(url + filename)
            with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
                local_filename, _ = urlretrieve(url + filename, os.path.join(prefix, filename), reporthook=reporthook(t))
        except AttributeError as e:
            print "An error occurred when downloading the file! Please get the dataset using a browser."
            raise e
    # We have a downloaded file
    # Check the stats and make sure they are ok
    file_stats = os.stat(os.path.join(prefix, filename))
    if num_bytes is None or file_stats.st_size == num_bytes:
        print "File {} successfully loaded".format(filename)
    else:
        raise Exception("Unexpected dataset size. Please get the dataset using a browser.")

    return local_filename 
Example #2
Source File: file_utils.py    From Benchmarks with MIT License 6 votes vote down vote up
def urlretrieve(url, filename, reporthook=None, data=None):
        def chunk_read(response, chunk_size=8192, reporthook=None):
            total_size = response.info().get('Content-Length').strip()
            total_size = int(total_size)
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                count += 1
                if not chunk:
                    reporthook(count, total_size, total_size)
                    break
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk) 
Example #3
Source File: data_utils.py    From KerasNeuralFingerprint with MIT License 6 votes vote down vote up
def urlretrieve(url, filename, reporthook=None, data=None):
        def chunk_read(response, chunk_size=8192, reporthook=None):
            total_size = response.info().get('Content-Length').strip()
            total_size = int(total_size)
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                if not chunk:
                    break
                count += 1
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk) 
Example #4
Source File: downloader.py    From auptimizer with GNU General Public License v3.0 6 votes vote down vote up
def download_data_url(url, download_dir):
	filename = url.split('/')[-1]
	file_path = os.path.join(download_dir, filename)
	
	if not os.path.exists(file_path):
		os.makedirs(download_dir, exist_ok=True)
		
		print('Download %s to %s' % (url, file_path))
		file_path, _ = request.urlretrieve(
			url=url,
			filename=file_path,
			reporthook=report_download_progress)
		
		print('\nExtracting files')
		if file_path.endswith('.zip'):
			zipfile.ZipFile(file=file_path, mode='r').extractall(download_dir)
		elif file_path.endswith(('.tar.gz', '.tgz')):
			tarfile.open(name=file_path, mode='r:gz').extractall(download_dir) 
Example #5
Source File: squad_preprocess.py    From squad-transformer with Apache License 2.0 6 votes vote down vote up
def maybe_download(url, filename, prefix, num_bytes=None):
    """Takes an URL, a filename, and the expected bytes, download
    the contents and returns the filename.
    num_bytes=None disables the file size check."""
    local_filename = None
    output_path = os.path.join(prefix, filename)
    if not os.path.exists(output_path):
        try:
            print("Downloading file {} to {}...".format(url + filename, output_path))
            with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
                local_filename, _ = urlretrieve(url + filename, output_path, reporthook=reporthook(t))
        except AttributeError as e:
            print("An error occurred when downloading the file! Please get the dataset using a browser.")
            raise e
    # We have a downloaded file
    # Check the stats and make sure they are ok
    file_stats = os.stat(os.path.join(prefix, filename))
    if num_bytes is None or file_stats.st_size == num_bytes:
        print("File {} successfully downloaded to {}.".format(filename, output_path))
    else:
        raise Exception("Unexpected dataset size. Please get the dataset using a browser.")

    return local_filename 
Example #6
Source File: data_utils.py    From Machine-Learning-with-TensorFlow-1.x with MIT License 6 votes vote down vote up
def download_file(file_url, output_file_dir, expected_size, FORCE=False):
    name = file_url.split('/')[-1]
    file_output_path = os.path.join(output_file_dir, name)
    print('Attempting to download ' + file_url)
    print('File output path: ' + file_output_path)
    print('Expected size: ' + str(expected_size))
    if not os.path.isdir(output_file_dir):
        os.makedirs(output_file_dir)

    if os.path.isfile(file_output_path) and os.stat(file_output_path).st_size == expected_size and not FORCE:
        print('File already downloaded completely!')
        return file_output_path
    else:
        print(' ')
        filename, _ = urlretrieve(file_url, file_output_path, download_hook_function)
        print(' ')
        statinfo = os.stat(filename)
        print(statinfo.st_size)
        if statinfo.st_size == expected_size:
            print('Found and verified', filename)
        else:
            raise Exception('Could not download ' + filename)
        return filename 
Example #7
Source File: data_utils.py    From Machine-Learning-with-TensorFlow-1.x with MIT License 6 votes vote down vote up
def download_file(file_url, output_file_dir, expected_size, FORCE=False):
    name = file_url.split('/')[-1]
    file_output_path = os.path.join(output_file_dir, name)
    print('Attempting to download ' + file_url)
    print('File output path: ' + file_output_path)
    print('Expected size: ' + str(expected_size))
    if not os.path.isdir(output_file_dir):
        os.makedirs(output_file_dir)

    if os.path.isfile(file_output_path) and os.stat(file_output_path).st_size == expected_size and not FORCE:
        print('File already downloaded completely!')
        return file_output_path
    else:
        print(' ')
        filename, _ = urlretrieve(file_url, file_output_path, download_hook_function)
        print(' ')
        statinfo = os.stat(filename)
        if statinfo.st_size == expected_size:
            print('Found and verified', filename)
        else:
            raise Exception('Could not download ' + filename)
        return filename 
Example #8
Source File: exchange_utils.py    From catalyst with Apache License 2.0 6 votes vote down vote up
def download_exchange_symbols(exchange_name):
    """
    Downloads the exchange's symbols.json from the repository.

    Parameters
    ----------
    exchange_name: str
    environ:

    Returns
    -------
    str

    """
    filename = get_exchange_symbols_filename(exchange_name)
    url = SYMBOLS_URL.format(exchange=exchange_name)
    response = request.urlretrieve(url=url, filename=filename)
    return response 
Example #9
Source File: downloads.py    From mead-baseline with Apache License 2.0 6 votes vote down vote up
def web_downloader(url, path_to_save=None):
    # Use a class to simulate the nonlocal keyword in 2.7
    class Context: pg = None

    def _report_hook(count, block_size, total_size):
        if Context.pg is None:
            length = int((total_size + block_size - 1) / float(block_size)) if total_size != -1 else 1
            Context.pg = create_progress_bar(length)
        Context.pg.update()

    if not path_to_save:
        path_to_save = "/tmp/data.dload-{}".format(os.getpid())
    try:
        path_to_save, _ = urlretrieve(url, path_to_save, reporthook=_report_hook)
        Context.pg.done()
    except Exception as e:  # this is too broad but there are too many exceptions to handle separately
        raise RuntimeError("failed to download data from [url]: {} [to]: {}".format(url, path_to_save))
    return path_to_save 
Example #10
Source File: vectorizers.py    From mead-baseline with Apache License 2.0 6 votes vote down vote up
def load_bert_vocab(vocab_file):
    global BERT_VOCAB
    if BERT_VOCAB is not None:
        return BERT_VOCAB

    if validate_url(vocab_file):
        print(f'Downloading {vocab_file}')
        vocab_file, _ = urlretrieve(vocab_file)

    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, "r") as rf:
        for line in rf:
            token = convert_to_unicode(line)
            if not token:
                break
            token = token.strip()
            vocab[token] = index
            index += 1
    BERT_VOCAB = vocab
    return vocab 
Example #11
Source File: download.py    From chainer with MIT License 6 votes vote down vote up
def download(url, dst_file_path):
    # Download a file, showing progress
    bar_wrap = [None]

    def reporthook(count, block_size, total_size):
        bar = bar_wrap[0]
        if bar is None:
            bar = progressbar.ProgressBar(
                maxval=total_size,
                widgets=[
                    progressbar.Percentage(),
                    ' ',
                    progressbar.Bar(),
                    ' ',
                    progressbar.FileTransferSpeed(),
                    ' | ',
                    progressbar.ETA(),
                ])
            bar.start()
            bar_wrap[0] = bar
        bar.update(min(count * block_size, total_size))

    request.urlretrieve(url, dst_file_path, reporthook=reporthook) 
Example #12
Source File: utils.py    From ctc_tensorflow_example with MIT License 6 votes vote down vote up
def maybe_download(filename, expected_bytes, force=False):
    """Download a file if not present, and make sure it's the right size."""
    if force or not os.path.exists(filename):
        print('Attempting to download:', filename)
        filename, _ = urlretrieve(url + filename, filename,
                                  reporthook=download_progress_hook)
        print('\nDownload Complete!')
    statinfo = os.stat(filename)

    if statinfo.st_size == expected_bytes:
        print('Found and verified', filename)
    else:
        raise Exception(
                        'Failed to verify ' + filename + \
                        '. Can you get to it with a browser?')
    return filename 
Example #13
Source File: util.py    From azure-cli-extensions with MIT License 6 votes vote down vote up
def retrieve_file_from_url(url):
    """
    Retrieve a file from an URL

    Args:
        url: The URL to retrieve the file from.

    Returns:
        The absolute path of the downloaded file.
    """
    try:
        alias_source, _ = urlretrieve(url)
        # Check for HTTPError in Python 2.x
        with open(alias_source, 'r') as f:
            content = f.read()
            if content[:3].isdigit():
                raise CLIError(ALIAS_FILE_URL_ERROR.format(url, content.strip()))
    except Exception as exception:
        if isinstance(exception, CLIError):
            raise

        # Python 3.x
        raise CLIError(ALIAS_FILE_URL_ERROR.format(url, exception))

    return alias_source 
Example #14
Source File: 1_prepare_pickle.py    From Neural-Network-Programming-with-TensorFlow with MIT License 6 votes vote down vote up
def maybe_download(filename, expected_bytes, force=False):
  """Download a file if not present, and make sure it's the right size."""
  dest_filename = os.path.join(data_root, filename)
  if force or not os.path.exists(dest_filename):
    print('Attempting to download:', filename) 
    filename, _ = urlretrieve(url + filename, dest_filename, reporthook=download_progress_hook)
    print('\nDownload Complete!')
  statinfo = os.stat(dest_filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified', dest_filename)
  else:
    raise Exception(
      'Failed to verify ' + dest_filename + '. Can you get to it with a browser?')
  return dest_filename


#num_classes = 10 
Example #15
Source File: data_utils.py    From deepQuest with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                count += 1
                if not chunk:
                    reporthook(count, total_size, total_size)
                    break
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk) 
Example #16
Source File: data.py    From tensorboardX with MIT License 5 votes vote down vote up
def download_mnist_data():
    print('Downloading {:s}...'.format(train_images))
    request.urlretrieve('{:s}/{:s}'.format(parent, train_images), train_images)
    print('Done')
    print('Downloading {:s}...'.format(train_labels))
    request.urlretrieve('{:s}/{:s}'.format(parent, train_labels), train_labels)
    print('Done')
    print('Downloading {:s}...'.format(test_images))
    request.urlretrieve('{:s}/{:s}'.format(parent, test_images), test_images)
    print('Done')
    print('Downloading {:s}...'.format(test_labels))
    request.urlretrieve('{:s}/{:s}'.format(parent, test_labels), test_labels)
    print('Done')

    print('Converting training data...')
    data_train, target_train = load_mnist(train_images, train_labels,
                                          num_train)
    print('Done')
    print('Converting test data...')
    data_test, target_test = load_mnist(test_images, test_labels, num_test)
    mnist = {'data': np.append(data_train, data_test, axis=0),
             'target': np.append(target_train, target_test, axis=0)}
    print('Done')
    print('Save output...')
    with open('mnist.pkl', 'wb') as output:
        six.moves.cPickle.dump(mnist, output, -1)
    print('Done')
    print('Convert completed') 
Example #17
Source File: data_utils.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while True:
                chunk = response.read(chunk_size)
                count += 1
                if reporthook is not None:
                    reporthook(count, chunk_size, total_size)
                if chunk:
                    yield chunk
                else:
                    break

        with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
                for chunk in chunk_read(response, reporthook=reporthook):
                    fd.write(chunk) 
Example #18
Source File: data_utils.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while True:
                chunk = response.read(chunk_size)
                count += 1
                if reporthook is not None:
                    reporthook(count, chunk_size, total_size)
                if chunk:
                    yield chunk
                else:
                    break

        with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
                for chunk in chunk_read(response, reporthook=reporthook):
                    fd.write(chunk) 
Example #19
Source File: load_data.py    From NumNum with MIT License 5 votes vote down vote up
def maybe_download(filename, force=False):
  """Download a file if not present, and make sure it's the right size."""
  if force or not os.path.exists(filename):
    print('Attempting to download:', filename) 
    filename, _ = urlretrieve(url + filename, filename)
    print('Download Complete!')
  statinfo = os.stat(filename)
  return filename 
Example #20
Source File: downloader.py    From chakin with MIT License 5 votes vote down vote up
def download(number=-1, name="", save_dir='./'):
    """Download pre-trained word vector
    :param number: integer, default ``None``
    :param save_dir: str, default './'
    :return: file path for downloaded file
    """
    df = load_datasets()

    if number > -1:
        row = df.iloc[[number]]
    elif name:
        row = df.loc[df["Name"] == name]

    url = ''.join(row.URL)
    if not url:
        print('The word vector you specified was not found. Please specify correct name.')

    widgets = ['Test: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ', FileTransferSpeed()]
    pbar = ProgressBar(widgets=widgets)

    def dlProgress(count, blockSize, totalSize):
        if pbar.max_value is None:
            pbar.max_value = totalSize
            pbar.start()

        pbar.update(min(count * blockSize, totalSize))

    file_name = url.split('/')[-1]
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, file_name)
    path, _ = urlretrieve(url, save_path, reporthook=dlProgress)
    pbar.finish()
    return path 
Example #21
Source File: 5_word2vec.py    From udacity-deep-learning with GNU General Public License v3.0 5 votes vote down vote up
def maybe_download(filename, expected_bytes):
    """Download a file if not present, and make sure it's the right size."""
    if not os.path.exists(filename):
        filename, _ = urlretrieve(url + filename, filename)
    statinfo = os.stat(filename)
    if statinfo.st_size == expected_bytes:
        print('Found and verified %s' % filename)
    else:
        print(statinfo.st_size)
        raise Exception(
            'Failed to verify ' + filename + '. Can you get to it with a browser?')
    return filename 
Example #22
Source File: 1_notmnist.py    From udacity-deep-learning with GNU General Public License v3.0 5 votes vote down vote up
def maybe_download(filename, expected_bytes, force=False):
    """Download a file if not present, and make sure it's the right size."""
    if force or not os.path.exists(filename):
        print('Attempting to download:', filename)
        filename, _ = urlretrieve(url + filename, filename, reporthook=download_progress_hook)
        print('\nDownload Complete!')
    statinfo = os.stat(filename)
    if statinfo.st_size == expected_bytes:
        print('Found and verified', filename)
    else:
        raise Exception(
            'Failed to verify ' + filename + '. Can you get to it with a browser?')
    return filename 
Example #23
Source File: 6_lstm.py    From udacity-deep-learning with GNU General Public License v3.0 5 votes vote down vote up
def maybe_download(filename, expected_bytes):
    """Download a file if not present, and make sure it's the right size."""
    if not os.path.exists(filename):
        filename, _ = urlretrieve(url + filename, filename)
    statinfo = os.stat(filename)
    if statinfo.st_size == expected_bytes:
        print('Found and verified %s' % filename)
    else:
        print(statinfo.st_size)
        raise Exception(
            'Failed to verify ' + filename + '. Can you get to it with a browser?')
    return filename 
Example #24
Source File: utils.py    From pretorched-x with MIT License 5 votes vote down vote up
def download_url(url, destination=None, progress_bar=True):
    """Download a URL to a local file.

    Parameters
    ----------
    url : str
        The URL to download.
    destination : str, None
        The destination of the file. If None is given the file is saved to a temporary directory.
    progress_bar : bool
        Whether to show a command-line progress bar while downloading.

    Returns
    -------
    filename : str
        The location of the downloaded file.

    Notes
    -----
    Progress bar use/example adapted from tqdm documentation: https://github.com/tqdm/tqdm
    """

    def my_hook(t):
        last_b = [0]

        def inner(b=1, bsize=1, tsize=None):
            if tsize is not None:
                t.total = tsize
            if b > 0:
                t.update((b - last_b[0]) * bsize)
            last_b[0] = b

        return inner

    if progress_bar:
        with tqdm(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
            filename, _ = urlretrieve(url, filename=destination, reporthook=my_hook(t))
    else:
        filename, _ = urlretrieve(url, filename=destination) 
Example #25
Source File: attribute_loader.py    From verb-attributes with MIT License 5 votes vote down vote up
def load_word_vectors(root, wv_type, dim):
    """Load word vectors from a path, trying .pt, .txt, and .zip extensions."""
    if isinstance(dim, int):
        dim = str(dim) + 'd'
    fname = os.path.join(root, wv_type + '.' + dim)
    if os.path.isfile(fname + '.pt'):
        fname_pt = fname + '.pt'
        print('loading word vectors from', fname_pt)
        return torch.load(fname_pt)
    if os.path.isfile(fname + '.txt'):
        fname_txt = fname + '.txt'
        cm = open(fname_txt, 'rb')
        cm = [line for line in cm]
    elif os.path.basename(wv_type) in URL:
        url = URL[wv_type]
        print('downloading word vectors from {}'.format(url))
        filename = os.path.basename(fname)
        if not os.path.exists(root):
            os.makedirs(root)
        with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
            fname, _ = urlretrieve(url, fname, reporthook=reporthook(t))
            with zipfile.ZipFile(fname, "r") as zf:
                print('extracting word vectors into {}'.format(root))
                zf.extractall(root)
        if not os.path.isfile(fname + '.txt'):
            raise RuntimeError('no word vectors of requested dimension found')
        return load_word_vectors(root, wv_type, dim)
    else:
        raise RuntimeError('unable to load word vectors')
####### 
Example #26
Source File: data_utils.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """

        def chunk_read(response, chunk_size=8192, reporthook=None):
            content_type = response.info().get('Content-Length')
            total_size = -1
            if content_type is not None:
                total_size = int(content_type.strip())
            count = 0
            while True:
                chunk = response.read(chunk_size)
                count += 1
                if reporthook is not None:
                    reporthook(count, chunk_size, total_size)
                if chunk:
                    yield chunk
                else:
                    break

        with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
                for chunk in chunk_read(response, reporthook=reporthook):
                    fd.write(chunk) 
Example #27
Source File: flask_cloudy.py    From flask-cloudy with MIT License 5 votes vote down vote up
def _download_from_url(self, url):
        """
        Download a url and return the tmp path
        :param url:
        :return:
        """
        ext = get_file_extension(url)
        if "?" in url:
            ext = get_file_extension(os.path.splitext(url.split("?")[0]))
        filepath = "/tmp/%s.%s" % (uuid.uuid4().hex, ext)
        request.urlretrieve(url, filepath)
        return filepath 
Example #28
Source File: char_language_model.py    From tf-tutorial with MIT License 5 votes vote down vote up
def maybe_download(filename='text8.zip', expected_bytes=31344016,
                   default_rul='http://mattmahoney.net/dc/'):
  """Download a file if not present, and make sure it's the right size."""
  if not os.path.exists(filename):
    filename, _ = urlretrieve(default_rul + filename, filename)
  statinfo = os.stat(filename)
  if statinfo.st_size == expected_bytes:
    print('Found and verified %s' % filename)
  else:
    print(statinfo.st_size)
    raise Exception(
      'Failed to verify ' + filename + '. Can you get to it with a browser?')
  return filename 
Example #29
Source File: preprocess.py    From bidaf-keras with GNU General Public License v3.0 5 votes vote down vote up
def maybe_download(base_url, filename, destination_dir, show_progress=True):
    class DownloadProgressBar(tqdm):
        def update_to(self, b=1, bsize=1, tsize=None):
            """
            b: int, optional
                Number of blocks just transferred [default: 1].
            bsize: int, optional
                Size of each block (in tqdm units) [default: 1].
            tsize: int, optional
                Total size (in tqdm units). If [default: None] remains unchanged.
            """
            if tsize is not None:
                self.total = tsize
            self.update(b * bsize - self.n)

    local_filename = None
    if not os.path.exists(os.path.join(destination_dir, filename)):
        try:
            if show_progress:
                print("Downloading file {}...".format(base_url + filename))
                # Download with a progress bar
                with DownloadProgressBar(unit='B', unit_scale=True,
                                         miniters=1, desc=filename) as t:
                    local_filename, _ = urlretrieve(base_url + filename,
                                                    filename=os.path.join(destination_dir, filename),
                                                    reporthook=t.update_to)
            else:
                # Simple download with no progress bar
                local_filename, _ = urlretrieve(base_url + filename, filename=os.path.join(destination_dir, filename))

            print("File {} successfully loaded".format(filename))
        except AttributeError as e:
            print("An error occurred when downloading the file! Please get the dataset using a browser.")
            raise e
        except KeyboardInterrupt as k:
            if os.path.exists(os.path.join(destination_dir, filename)):
                os.remove(os.path.join(destination_dir, filename))
            raise k 
Example #30
Source File: data_utils.py    From dspp-keras with GNU Affero General Public License v3.0 5 votes vote down vote up
def urlretrieve(url, filename, reporthook=None, data=None):
        """Replacement for `urlretrive` for Python 2.

        Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
        `urllib` module, known to have issues with proxy management.

        # Arguments
            url: url to retrieve.
            filename: where to store the retrieved data locally.
            reporthook: a hook function that will be called once
                on establishment of the network connection and once
                after each block read thereafter.
                The hook will be passed three arguments;
                a count of blocks transferred so far,
                a block size in bytes, and the total size of the file.
            data: `data` argument passed to `urlopen`.
        """
        def chunk_read(response, chunk_size=8192, reporthook=None):
            total_size = response.info().get('Content-Length').strip()
            total_size = int(total_size)
            count = 0
            while 1:
                chunk = response.read(chunk_size)
                count += 1
                if not chunk:
                    reporthook(count, total_size, total_size)
                    break
                if reporthook:
                    reporthook(count, chunk_size, total_size)
                yield chunk

        response = urlopen(url, data)
        with open(filename, 'wb') as fd:
            for chunk in chunk_read(response, reporthook=reporthook):
                fd.write(chunk)