Python Examples of urllib.request.urlretrieve

Source File: download.py From Voice_Converter_CycleGAN with MIT License

8 votes

def maybe_download(filename, url, destination_dir, expected_bytes = None, force = False):

    filepath = os.path.join(destination_dir, filename)

    if force or not os.path.exists(filepath):
        if not os.path.exists(destination_dir):
            os.makedirs(destination_dir)

        print('Attempting to download: ' + filename)
        filepath, _ = urlretrieve(url, filepath, reporthook = progress_bar)
        print('Download complete!')

    statinfo = os.stat(filepath)

    if expected_bytes != None:
        if statinfo.st_size == expected_bytes:
            print('Found and verified: ' + filename)
        else:
            raise Exception('Failed to verify: ' + filename + '. Can you get to it with a browser?')
    else:
        print('Found: ' + filename)
        print('The size of the file: ' + str(statinfo.st_size))

    return filepath

Source File: create_datasets.py From realmix with Apache License 2.0

6 votes

def _load_cifar100():
    def unflatten(images):
        return np.transpose(images.reshape((images.shape[0], 3, 32, 32)),
                            [0, 2, 3, 1])

    with tempfile.NamedTemporaryFile() as f:
        request.urlretrieve(URLS['cifar100'], f.name)
        tar = tarfile.open(fileobj=f)
        data_dict = scipy.io.loadmat(tar.extractfile('cifar-100-matlab/train.mat'))
        train_set = {'images': data_dict['data'],
                     'labels': data_dict['fine_labels'].flatten()}
        data_dict = scipy.io.loadmat(tar.extractfile('cifar-100-matlab/test.mat'))
        test_set = {'images': data_dict['data'],
                    'labels': data_dict['fine_labels'].flatten()}
    train_set['images'] = _encode_png(unflatten(train_set['images']))
    test_set['images'] = _encode_png(unflatten(test_set['images']))
    return dict(train=train_set, test=test_set)

# Load a custom dataset from a local directory.

Source File: spider_bai_si_bu_de_jie.py From spider_python with Apache License 2.0

6 votes

def run(self):
		while True:
			if self.joke_queue.empty() and self.page_queue.empty():
				print(self.name + '任务完成~')
				break

			# 2.从joke_queue队列中获取数据
			joke_info = self.joke_queue.get(timeout=40)
			username, content, img, alt, pubtime = joke_info

			# 3.上锁
			self.gLock.acquire()

			# 4.写入数据到csv中
			self.writer.writerow((username, content, img, alt, pubtime))

			# 5.下载图片到本地
			# file_name = alt + fileutils.get_file_suffix(img)
			# request.urlretrieve(img, './imgs/%s' % file_name)

			# 6.释放锁
			self.gLock.release()

			print('写入一条数据成功')

Source File: douban.py From spider_python with Apache License 2.0

6 votes

def _regonize_captcha(self, image_url):
        """
        人工识别验证码【urllib+PIL】
        :param image_url:
        :return:
        """
        print('验证码的地址:%s,开始下载图片' % image_url)

        # 下载图片到本地
        request.urlretrieve(image_url, 'captcha.png')

        print('下载图片完成，开始显示图片')

        # 显示在控制台，手动输入验证码
        # 打开图片
        image = Image.open('captcha.png')
        # 展示
        image.show()

        # 提示输入验证码
        captcha = input('请输入验证码:')

        return captcha

Source File: util.py From SPN.pytorch with MIT License

6 votes

def download_url(url, destination=None, progress_bar=True):
    def my_hook(t):
        last_b = [0]

        def inner(b=1, bsize=1, tsize=None):
            if tsize is not None:
                t.total = tsize
            if b > 0:
                t.update((b - last_b[0]) * bsize)
            last_b[0] = b

        return inner

    if progress_bar:
        with tqdm(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
            filename, _ = urlretrieve(url, filename=destination, reporthook=my_hook(t))
    else:
        filename, _ = urlretrieve(url, filename=destination)

Source File: download.py From Singing_Voice_Separation_RNN with MIT License

6 votes

def maybe_download(filename, url, destination_dir, expected_bytes = None, force = False):

    filepath = os.path.join(destination_dir, filename)

    if force or not os.path.exists(filepath):
        if not os.path.exists(destination_dir):
            os.makedirs(destination_dir)

        print('Attempting to download: ' + filename)
        filepath, _ = urlretrieve(url, filepath, reporthook = progress_bar)
        print('Download complete!')

    statinfo = os.stat(filepath)

    if expected_bytes != None:
        if statinfo.st_size == expected_bytes:
            print('Found and verified: ' + filename)
        else:
            raise Exception('Failed to verify: ' + filename + '. Can you get to it with a browser?')
    else:
        print('Found: ' + filename)
        print('The size of the file: ' + str(statinfo.st_size))

    return filepath

Source File: github.py From fusesoc with BSD 2-Clause "Simplified" License

6 votes

def _checkout(self, local_dir):
        user = self.config.get("user")
        repo = self.config.get("repo")

        version = self.config.get("version", "master")

        # TODO : Sanitize URL
        url = URL.format(user=user, repo=repo, version=version)
        logger.info("Downloading {}/{} from github".format(user, repo))
        try:
            (filename, headers) = urllib.urlretrieve(url)
        except URLError as e:
            raise RuntimeError("Failed to download '{}'. '{}'".format(url, e.reason))
        t = tarfile.open(filename)
        (cache_root, core) = os.path.split(local_dir)

        # Ugly hack to get the first part of the directory name of the extracted files
        tmp = t.getnames()[0]
        t.extractall(cache_root)
        os.rename(os.path.join(cache_root, tmp), os.path.join(cache_root, core))

Source File: fid_score.py From AutoGAN with MIT License

6 votes

def check_or_download_inception(inception_path):
    """ Checks if the path to the inception file is valid, or downloads
        the file if it is not present. """
    INCEPTION_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
    if inception_path is None:
        inception_path = '/tmp'
    inception_path = pathlib.Path(inception_path)
    model_file = inception_path / 'classify_image_graph_def.pb'
    if not model_file.exists():
        print("Downloading Inception model")
        from urllib import request
        import tarfile
        fn, _ = request.urlretrieve(INCEPTION_URL)
        with tarfile.open(fn, mode='r') as f:
            f.extract('classify_image_graph_def.pb', str(model_file.parent))
    return str(model_file)

Source File: create_datasets.py From realmix with Apache License 2.0

6 votes

def _load_cifar10():
    def unflatten(images):
        return np.transpose(images.reshape((images.shape[0], 3, 32, 32)),
                            [0, 2, 3, 1])

    with tempfile.NamedTemporaryFile() as f:
        request.urlretrieve(URLS['cifar10'], f.name)
        tar = tarfile.open(fileobj=f)
        train_data_batches, train_data_labels = [], []
        for batch in range(1, 6):
            data_dict = scipy.io.loadmat(tar.extractfile(
                'cifar-10-batches-mat/data_batch_{}.mat'.format(batch)))
            train_data_batches.append(data_dict['data'])
            train_data_labels.append(data_dict['labels'].flatten())
        train_set = {'images': np.concatenate(train_data_batches, axis=0),
                     'labels': np.concatenate(train_data_labels, axis=0)}
        data_dict = scipy.io.loadmat(tar.extractfile(
            'cifar-10-batches-mat/test_batch.mat'))
        test_set = {'images': data_dict['data'],
                    'labels': data_dict['labels'].flatten()}
    train_set['images'] = _encode_png(unflatten(train_set['images']))
    test_set['images'] = _encode_png(unflatten(test_set['images']))
    return dict(train=train_set, test=test_set)

Source File: cambridgema.py From cornerwise with MIT License

6 votes

def import_shapers(logger):
    (_, zip_path) = tempfile.mkstemp()
    (_, http_message) = request.urlretrieve(url, zip_path)
    zip_file = ZipFile(zip_path)
    ex_dir = tempfile.mkdtemp()
    zip_file.extractall(ex_dir)
    shapefiles = glob.glob1(ex_dir, "*.shp")
    lm = LayerMapping(Parcel, "/data/shapefiles/M274TaxPar.shp", {
        "shape_leng": "SHAPE_Leng",
        "shape_area": "SHAPE_Area",
        "map_par_id": "MAP_PAR_ID",
        "loc_id": "LOC_ID",
        "poly_type": "POLY_TYPE",
        "map_no": "MAP_NO",
        "source": "SOURCE",
        "plan_id": "PLAN_ID",
        "last_edit": "LAST_EDIT",
        "town_id": "TOWN_ID",
        "shape": "POLYGON"
    })

Source File: commands.py From udata with GNU Affero General Public License v3.0

6 votes

def load_logos(filename):
    '''
    Load logos from a geologos archive from <filename>

    <filename> can be either a local path or a remote URL.
    '''
    if filename.startswith('http'):
        log.info('Downloading GeoLogos bundle: %s', filename)
        filename, _ = urlretrieve(filename, tmp.path('geologos.tar.xz'))

    log.info('Extracting GeoLogos bundle')
    with contextlib.closing(lzma.LZMAFile(filename)) as xz:
        with tarfile.open(fileobj=xz, encoding='utf8') as tar:
            tar.extractall(tmp.root, members=tar.getmembers())

    log.info('Moving to the final location and cleaning up')
    if os.path.exists(logos.root):
        shutil.rmtree(logos.root)
    shutil.move(tmp.path('logos'), logos.root)
    log.info('Done')

Source File: antipackage.py From antipackage with MIT License

6 votes

def _install_module(self, fullname):
        top, username, repo, modname = self._parse_fullname(fullname)
        url = 'https://raw.githubusercontent.com/%s/%s/master/%s' % (username, repo, modname+'.py')
        print('Downloading: ', url)
        try:
            tmp_file, resp = urlretrieve(url)
            with open(tmp_file, 'r') as f:
                new_content = f.read()
            if new_content=='Not Found':
                raise InstallError('remote file does not exist')
        except IOError:
            raise InstallError('error downloading file')
        
        new = tmp_file
        old = self._install_path(fullname)
        updated = self._update_if_changed(old, new)
        if updated=='updated':
            print('Updating module: ', fullname)
        elif updated=='installed':
            print('Installing module: ', fullname)
        elif updated=='noaction':
            print('Using existing version: ', fullname)

Source File: sfd_detector.py From VTuber_Unity with MIT License

6 votes

def __init__(self, device, path_to_detector=None, verbose=False):
        super(SFDDetector, self).__init__(device, verbose)

        base_path = "face_alignment/ckpts"

        # Initialise the face detector
        if path_to_detector is None:
            path_to_detector = os.path.join(
                base_path, "s3fd_convert.pth")

            if not os.path.isfile(path_to_detector):
                print("Downloading the face detection CNN. Please wait...")

                path_to_temp_detector = os.path.join(
                    base_path, "s3fd_convert.pth.download")

                if os.path.isfile(path_to_temp_detector):
                    os.remove(os.path.join(path_to_temp_detector))

                request_file.urlretrieve(
                    "https://www.adrianbulat.com/downloads/python-fan/s3fd_convert.pth",
                    os.path.join(path_to_temp_detector))

                os.rename(os.path.join(path_to_temp_detector), os.path.join(path_to_detector))

        self.face_detector = s3fd()
        self.face_detector.load_state_dict(torch.load(path_to_detector))
        self.face_detector.to(device)
        self.face_detector.eval()

Source File: multi_input.py From aboleth with Apache License 2.0

6 votes

def fetch_data():
    """Download the data."""
    train_file = tempfile.NamedTemporaryFile()
    test_file = tempfile.NamedTemporaryFile()
    req.urlretrieve("http://mlr.cs.umass.edu/ml/machine-learning-databases"
                    "/adult/adult.data", train_file.name)
    req.urlretrieve("http://mlr.cs.umass.edu/ml/machine-learning-databases/"
                    "adult/adult.test", test_file.name)

    df_train = pd.read_csv(train_file, names=COLUMNS, skipinitialspace=True)
    df_test = pd.read_csv(test_file, names=COLUMNS, skipinitialspace=True,
                          skiprows=1)

    df_train[LABEL_COLUMN] = (df_train["income_bracket"]
                              .apply(lambda x: ">50K" in x)).astype(int)
    df_test[LABEL_COLUMN] = (df_test["income_bracket"]
                             .apply(lambda x: ">50K" in x)).astype(int)

    return df_train, df_test

Source File: mnist_softmax_cntk.py From ai-gym with MIT License

6 votes

def load_or_download_mnist_files(filename, num_samples, local_data_dir):

    if (local_data_dir):
        local_path = os.path.join(local_data_dir, filename)
    else:
        local_path = os.path.join(os.getcwd(), filename)

    if os.path.exists(local_path):
        gzfname = local_path
    else:
        local_data_dir = os.path.dirname(local_path)
        if not os.path.exists(local_data_dir):
            os.makedirs(local_data_dir)
        filename = "http://yann.lecun.com/exdb/mnist/" + filename
        print ("Downloading from" + filename, end=" ")
        gzfname, h = urlretrieve(filename, local_path)
        print ("[Done]")

    return gzfname

Source File: cifar_prepare.py From ngraph-python with Apache License 2.0

6 votes

def loadData(src):
    print('Downloading ' + src)
    fname, h = urlretrieve(src, './delete.me')
    print('Done.')
    try:
        print('Extracting files...')
        with tarfile.open(fname) as tar:
            tar.extractall()
        print('Done.')
        print('Preparing train set...')
        trn = np.empty((0, numFeature + 1), dtype=np.int)
        for i in range(5):
            batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
            trn = np.vstack((trn, readBatch(batchName)))
        print('Done.')
        print('Preparing test set...')
        tst = readBatch('./cifar-10-batches-py/test_batch')
        print('Done.')
    finally:
        os.remove(fname)
    return (trn, tst)

Source File: mnist_training.py From ngraph-python with Apache License 2.0

6 votes

def loadData(src, cimg):
    gzfname, h = urlretrieve(src, './delete.me')
    try:
        with gzip.open(gzfname) as gz:
            n = struct.unpack('I', gz.read(4))
            if n[0] != 0x3080000:
                raise Exception('Invalid file: unexpected magic number.')
            n = struct.unpack('>I', gz.read(4))[0]
            if n != cimg:
                raise Exception('Invalid file: expected {0} entries.'.format(cimg))
            crow = struct.unpack('>I', gz.read(4))[0]
            ccol = struct.unpack('>I', gz.read(4))[0]
            if crow != 28 or ccol != 28:
                raise Exception('Invalid file: expected 28 rows/cols per image.')
            res = np.fromstring(gz.read(cimg * crow * ccol), dtype=np.uint8)
    finally:
        os.remove(gzfname)
    return res.reshape((cimg, crow * ccol))

Source File: auto_send_emoji.py From spider_python with Apache License 2.0

5 votes

def download_emojis(self, target_emoji):
        """
        下载表情
        :param target_emojis:
        :return:
        """
        # 本地保存目录
        local_img = './imgs/%s' % target_emoji.get('name')

        request.urlretrieve(target_emoji.get('url'), local_img)

        print('emoji保存本地地址:%s' % local_img)

        return local_img

Source File: download_binaries.py From Montreal-Forced-Aligner with MIT License

5 votes

def download(args):
    base_dir = os.path.dirname(os.path.abspath(__file__))
    temp_dir = args.temp_directory
    if not args.temp_directory:
        temp_dir = base_dir
    os.makedirs(base_dir, exist_ok=True)
    if sys.platform == 'darwin':
        plat = 'macosx'
    elif sys.platform == 'win32':
        plat = 'win64'
    else:
        plat = 'linux'
    print('Downloading precompiled binaries for {}...'.format(plat))

    download_link = 'http://mlmlab.org/mfa/precompiled_binaries/mfa_thirdparty_{}.zip'.format(
        plat)
    path = os.path.join(temp_dir, '{}.zip'.format(plat))
    if args.redownload or not os.path.exists(path):
        with tqdm(unit='B', unit_scale=True, miniters=1) as t:
            filename, headers = urlretrieve(download_link, path, reporthook=tqdm_hook(t), data=None)
    shutil.unpack_archive(path, base_dir)
    if not args.keep:
        os.remove(path)
    if plat != 'win':
        import stat
        bin_dir = os.path.join(base_dir, 'bin')
        for f in os.listdir(bin_dir):
            if '.' in f:
                continue
            os.chmod(os.path.join(bin_dir, f), stat.S_IEXEC | stat.S_IWUSR | stat.S_IRUSR)
    return True

Source File: GetLSPData.py From deeppose with GNU General Public License v3.0

5 votes

def maybe_download():
    if not os.path.exists(FLAGS.data_dir):
        os.mkdir(FLAGS.data_dir)
    file_path = os.path.join(FLAGS.data_dir, FLAGS.comp_filename)
    if not os.path.exists(file_path):
        print('Downloading ', file_path, '.')
        file_path, _ = url_request.urlretrieve(FLAGS.download_url + FLAGS.comp_filename, file_path,
                                               reporthook=download_progress)
        stat_info = os.stat(file_path)
        print('Successfully downloaded', stat_info.st_size, 'bytes.')
    else:
        print(file_path, 'already exists.')

    return file_path

Source File: wget.py From crunchy-xml-decoder with GNU General Public License v2.0

5 votes

def download(url, out=None, bar=bar_adaptive):
    """High level function, which downloads URL into tmp file in current
    directory and then renames it to filename autodetected from either URL
    or HTTP headers.

    :param bar: function to track download progress (visualize etc.)
    :param out: output filename or directory
    :return:    filename where URL is downloaded to
    """
    names = dict()
    names["out"] = out or ''
    names["url"] = filename_from_url(url)
    # get filename for temp file in current directory
    prefix = (names["url"] or names["out"] or ".") + "."
    (fd, tmpfile) = tempfile.mkstemp(".tmp", prefix=prefix, dir=".")
    os.close(fd)
    os.unlink(tmpfile)

    # set progress monitoring callback
    def callback_charged(blocks, block_size, total_size):
        # 'closure' to set bar drawing function in callback
        callback_progress(blocks, block_size, total_size, bar_function=bar)
    if bar:
        callback = callback_charged
    else:
        callback = None

    (tmpfile, headers) = urllib.urlretrieve(url, tmpfile, callback)
    names["header"] = filename_from_headers(headers)
    if os.path.isdir(names["out"]):
        filename = names["header"] or names["url"]
        filename = names["out"] + "/" + filename
    else:
        filename = names["out"] or names["header"] or names["url"]
    # add numeric ' (x)' suffix if filename already exists
    if os.path.exists(filename):
        filename = filename_fix_existing(filename)
    shutil.move(tmpfile, filename)

    #print headers
    return filename

Source File: wget.py From crunchy-xml-decoder with GNU General Public License v2.0

5 votes

def callback_progress(blocks, block_size, total_size, bar_function):
    """callback function for urlretrieve that is called when connection is
    created and when once for each block

    draws adaptive progress bar in terminal/console

    use sys.stdout.write() instead of "print,", because it allows one more
    symbol at the line end without linefeed on Windows

    :param blocks: number of blocks transferred so far
    :param block_size: in bytes
    :param total_size: in bytes, can be -1 if server doesn't return it
    :param bar_function: another callback function to visualize progress
    """
    global __current_size
 
    width = min(100, get_console_width())

    if sys.version_info[:3] == (3, 3, 0):  # regression workaround
        if blocks == 0:  # first call
            __current_size = 0
        else:
            __current_size += block_size
        current_size = __current_size
    else:
        current_size = min(blocks*block_size, total_size)
    progress = bar_function(current_size, total_size, width)
    if progress:
        sys.stdout.write("\r" + progress)

Source File: util.py From nfl-elo-game with MIT License

5 votes

def read_games(file):
        """ Initializes game objects from csv """
        games = [item for item in csv.DictReader(open(file))]

        # Uncommenting these three lines will grab the latest game results for 2019, update team ratings accordingly, and make forecasts for upcoming games
        #file_2019 = file.replace(".", "_2019.")
        #urlretrieve("https://projects.fivethirtyeight.com/nfl-api/2019/nfl_games_2019.csv", file_2019)
        #games += [item for item in csv.DictReader(open(file_2019))]

        for game in games:
            game['season'], game['neutral'], game['playoff'] = int(game['season']), int(game['neutral']), int(game['playoff'])
            game['score1'], game['score2'] = int(game['score1']) if game['score1'] != '' else None, int(game['score2']) if game['score2'] != '' else None
            game['elo_prob1'], game['result1'] = float(game['elo_prob1']) if game['elo_prob1'] != '' else None, float(game['result1']) if game['result1'] != '' else None

        return games

Source File: cifar_data_processing.py From batch-shipyard with MIT License

5 votes

def download_data(src):
    print ('Downloading ' + src)
    fname, h = urlretrieve(src, './delete.me')
    print ('Done.')
    return fname

Source File: ELMoWordEmbeddings.py From elmo-bilstm-cnn-crf with Apache License 2.0

5 votes

def download(self, url, savePath, silent=False):
        filename = os.path.basename(urlparse.urlparse(url).path) or 'downloaded.file'

        def get_size():
            meta = urllib2.urlopen(url).info()
            meta_func = meta.getheaders if hasattr(
                meta, 'getheaders') else meta.get_all
            meta_length = meta_func('Content-Length')
            try:
                return int(meta_length[0])
            except:
                return 0

        def kb_to_mb(kb):
            return kb / 1024.0 / 1024.0

        def callback(blocks, block_size, total_size):
            current = blocks * block_size
            percent = 100.0 * current / total_size
            line = '[{0}{1}]'.format(
                '=' * int(percent / 2), ' ' * (50 - int(percent / 2)))
            status = '\r{0:3.0f}%{1} {2:3.1f}/{3:3.1f} MB'
            sys.stdout.write(
                status.format(
                    percent, line, kb_to_mb(current), kb_to_mb(total_size)))

        logging.info(
            'Downloading: {0} ({1:3.1f} MB)'.format(url, kb_to_mb(get_size())))
        try:
            (savePath, headers) = urlretrieve(url, savePath, None if silent else callback)
        except:
            os.remove(savePath)
            raise Exception("Can't download {0}".format(savePath))
        else:
            print()
            logging.info('Downloaded to: {0}'.format(savePath))

        return savePath

Source File: pyinstastories.py From PyInstaStories with MIT License

5 votes

def download_file(url, path, attempt=0):
	try:
		urllib.urlretrieve(url, path)
		urllib.urlcleanup()
	except Exception as e:
		if not attempt == 3:
			attempt += 1
			print("[E] ({:d}) Download failed: {:s}.".format(attempt, str(e)))
			print("[W] Trying again in 5 seconds.")
			time.sleep(5)
			download_file(url, path, attempt)
		else: 
			print("[E] Retry failed three times, skipping file.")
			print('-' * 70)

Source File: appveyor-bootstrap.py From python-remote-pdb with BSD 2-Clause "Simplified" License

5 votes

def download_file(url, path):
    print("Downloading: {} (into {})".format(url, path))
    progress = [0, 0]

    def report(count, size, total):
        progress[0] = count * size
        if progress[0] - progress[1] > 1000000:
            progress[1] = progress[0]
            print("Downloaded {:,}/{:,} ...".format(progress[1], total))

    dest, _ = urlretrieve(url, path, reporthook=report)
    return dest

Source File: _php_builtins.py From pigaios with GNU General Public License v3.0

5 votes

def get_php_references():
        download = urlretrieve(PHP_MANUAL_URL)
        tar = tarfile.open(download[0])
        tar.extractall()
        tar.close()
        for file in glob.glob("%s%s" % (PHP_MANUAL_DIR, PHP_REFERENCE_GLOB)):
            yield file
        os.remove(download[0])

Source File: twitter-export-image-fill.py From twitter-export-image-fill with The Unlicense

5 votes

def download_image(url, local_filename):
  if not download_images:
    return True

  try:
    urlretrieve(url, local_filename)
    return True
  except:
    return False


# Download a given video via youtube-dl

Source File: downloader.py From scudcloud with MIT License

5 votes

def run(self):
        try:
            file_name, headers = request.urlretrieve(self.icon, self.path)
            self.wrapper.icon = file_name
        except:
            pass

Python urllib.request.urlretrieve() Examples