Python Examples of urllib.urlretrieve

Source File: get_data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

10 votes

def get_cifar10(data_dir):
    if not os.path.isdir(data_dir):
        os.system("mkdir " + data_dir)
    cwd = os.path.abspath(os.getcwd())
    os.chdir(data_dir)
    if (not os.path.exists('train.rec')) or \
       (not os.path.exists('test.rec')) :
        import urllib, zipfile, glob
        dirname = os.getcwd()
        zippath = os.path.join(dirname, "cifar10.zip")
        urllib.urlretrieve("http://data.mxnet.io/mxnet/data/cifar10.zip", zippath)
        zf = zipfile.ZipFile(zippath, "r")
        zf.extractall()
        zf.close()
        os.remove(zippath)
        for f in glob.glob(os.path.join(dirname, "cifar", "*")):
            name = f.split(os.path.sep)[-1]
            os.rename(f, os.path.join(dirname, name))
        os.rmdir(os.path.join(dirname, "cifar"))
    os.chdir(cwd)

# data

Source File: WOS_input.py From HDLTex with MIT License

6 votes

def download_and_extract():
    """
    Download and extract the WOS datasets
    :return: None
    """
    dest_directory = DATA_DIR
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)


    path = os.path.abspath(dest_directory)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\rDownloading %s %.2f%%' % (filename,
                                                          float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()

        filepath, _ = urllib.urlretrieve(DATA_URL, filepath, reporthook=_progress)

        print('Downloaded', filename)

        tarfile.open(filepath, 'r').extractall(dest_directory)
    return path

Source File: sfd_detector.py From VTuber_Unity with MIT License

6 votes

def __init__(self, device, path_to_detector=None, verbose=False):
        super(SFDDetector, self).__init__(device, verbose)

        base_path = "face_alignment/ckpts"

        # Initialise the face detector
        if path_to_detector is None:
            path_to_detector = os.path.join(
                base_path, "s3fd_convert.pth")

            if not os.path.isfile(path_to_detector):
                print("Downloading the face detection CNN. Please wait...")

                path_to_temp_detector = os.path.join(
                    base_path, "s3fd_convert.pth.download")

                if os.path.isfile(path_to_temp_detector):
                    os.remove(os.path.join(path_to_temp_detector))

                request_file.urlretrieve(
                    "https://www.adrianbulat.com/downloads/python-fan/s3fd_convert.pth",
                    os.path.join(path_to_temp_detector))

                os.rename(os.path.join(path_to_temp_detector), os.path.join(path_to_detector))

        self.face_detector = s3fd()
        self.face_detector.load_state_dict(torch.load(path_to_detector))
        self.face_detector.to(device)
        self.face_detector.eval()

Source File: mnist.py From pixel_rnn with MIT License

6 votes

def load(batch_size, test_batch_size):
    filepath = '/tmp/mnist.pkl.gz'
    url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'

    if not os.path.isfile(filepath):
        print "Couldn't find MNIST dataset in /tmp, downloading..."
        urllib.urlretrieve(url, filepath)

    with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f:
        train_data, dev_data, test_data = pickle.load(f)

    return (
        mnist_generator(train_data, batch_size), 
        mnist_generator(dev_data, test_batch_size), 
        mnist_generator(test_data, test_batch_size)
    )

Source File: dpAutoRig.py From dpAutoRigSystem with GNU General Public License v2.0

6 votes

def downloadUpdate(self, url, ext, *args):
        """ Download file from given url adrees and ask user to choose folder and file name to save
        """
        extFilter = "*."+ext
        downloadFolder = cmds.fileDialog2(fileFilter=extFilter, dialogStyle=2)
        if downloadFolder:
            cmds.progressWindow(title='Download Update', progress=50, status='Downloading...', isInterruptable=False)
            try:
                urllib.urlretrieve(url, downloadFolder[0])
                self.info('i094_downloadUpdate', 'i096_downloaded', downloadFolder[0]+'\n\n'+self.langDic[self.langName]['i018_thanks'], 'center', 205, 270)
                # closes dpUpdateWindow:
                if cmds.window('dpUpdateWindow', query=True, exists=True):
                    cmds.deleteUI('dpUpdateWindow', window=True)
            except:
                self.info('i094_downloadUpdate', 'e009_failDownloadUpdate', downloadFolder[0]+'\n\n'+self.langDic[self.langName]['i097_sorry'], 'center', 205, 270)
            cmds.progressWindow(endProgress=True)

Source File: mnist.py From improved_wgan_training with MIT License

6 votes

def load(batch_size, test_batch_size, n_labelled=None):
    filepath = '/tmp/mnist.pkl.gz'
    url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'

    if not os.path.isfile(filepath):
        print "Couldn't find MNIST dataset in /tmp, downloading..."
        urllib.urlretrieve(url, filepath)

    with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f:
        train_data, dev_data, test_data = pickle.load(f)

    return (
        mnist_generator(train_data, batch_size, n_labelled), 
        mnist_generator(dev_data, test_batch_size, n_labelled), 
        mnist_generator(test_data, test_batch_size, n_labelled)
    )

Source File: coco.py From Deep-Feature-Flow-Segmentation with MIT License

6 votes

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

Source File: coco.py From visually-informed-embedding-of-word-VIEW- with BSD 2-Clause "Simplified" License

6 votes

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

Source File: bench_engine.py From mobile-ai-bench with Apache License 2.0

6 votes

def get_model_file(file_path, checksum, output_dir, push_list):
    filename = file_path.split('/')[-1]
    if file_path.startswith("http"):
        local_file_path = output_dir + '/' + filename
        if not os.path.exists(local_file_path) \
                or bench_utils.file_checksum(local_file_path) != checksum:
            print("downloading %s..." % filename)
            urllib.urlretrieve(file_path, local_file_path)
        aibench_check(bench_utils.file_checksum(local_file_path) == checksum,
                      "file %s md5 checksum not match" % filename)
    else:
        local_file_path = file_path
        aibench_check(bench_utils.file_checksum(local_file_path) == checksum,
                      "file %s md5 checksum not match" % filename)

    push_list.append(local_file_path)

Source File: github.py From fusesoc with BSD 2-Clause "Simplified" License

6 votes

def _checkout(self, local_dir):
        user = self.config.get("user")
        repo = self.config.get("repo")

        version = self.config.get("version", "master")

        # TODO : Sanitize URL
        url = URL.format(user=user, repo=repo, version=version)
        logger.info("Downloading {}/{} from github".format(user, repo))
        try:
            (filename, headers) = urllib.urlretrieve(url)
        except URLError as e:
            raise RuntimeError("Failed to download '{}'. '{}'".format(url, e.reason))
        t = tarfile.open(filename)
        (cache_root, core) = os.path.split(local_dir)

        # Ugly hack to get the first part of the directory name of the extracted files
        tmp = t.getnames()[0]
        t.extractall(cache_root)
        os.rename(os.path.join(cache_root, tmp), os.path.join(cache_root, core))

Source File: test_urllib.py From ironpython2 with Apache License 2.0

6 votes

def test_short_content_raises_ContentTooShortError(self):
        self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1

FF
''')

        def _reporthook(par1, par2, par3):
            pass

        try:
            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
                    'http://example.com', reporthook=_reporthook)
        finally:
            self.unfakehttp()

Source File: coco.py From TFFRCNN with MIT License

6 votes

def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)

Source File: adblock_ads.py From info-flow-experiments with GNU General Public License v3.0

6 votes

def _fetch_easylist(self):
        '''
        Downloads the latest version of easylist, and if newer replaces any
        existing one.
        '''
        tmp_easylist = "tmp_"+self.EASYLIST
        cur_version = self._easylist_version()

        # download latest easylist from the Internet
        urllib.urlretrieve(self.EASYLIST_URL,tmp_easylist)
        tmp_version = self._easylist_version(path=tmp_easylist)
        
        # if necessary update
        if tmp_version > cur_version and cur_version != -1:
            os.remove(self.EASYLIST)
            shutil.move(tmp_easylist,self.EASYLIST)
            print ("Updated easylist from {} to {}".format(cur_version,tmp_version))
        elif cur_version == -1:
            shutil.move(tmp_easylist,self.EASYLIST)
            print("New easylist {}".format(tmp_version))
        else:
            os.remove(tmp_easylist)
            print("Easylist already up to date at: {}".format(tmp_version))

Source File: test_urllib.py From ironpython2 with Apache License 2.0

6 votes

def test_copy(self):
        # Test that setting the filename argument works.
        second_temp = "%s.2" % test_support.TESTFN
        self.registerFileForCleanUp(second_temp)
        result = urllib.urlretrieve(self.constructLocalFileUrl(
            test_support.TESTFN), second_temp)
        self.assertEqual(second_temp, result[0])
        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
                                                  "made")
        FILE = file(second_temp, 'rb')
        try:
            text = FILE.read()
            FILE.close()
        finally:
            try: FILE.close()
            except: pass
        self.assertEqual(self.text, text)

Source File: imageDownloader.py From Some-Examples-of-Simple-Python-Script with GNU Affero General Public License v3.0

6 votes

def downloadImages(url):
    page   = BeautifulSoup(urllib2.urlopen(url))
    images = set([ img['src'] for img in page.findAll('img') ])
    
    print '[i] Downloading {} images...'.format(len(images))
    count = 0
    for image in images:
        name = image.split('/')[-1].replace('%20', ' ')
        try:
            count += 1
            urllib.urlretrieve(image, path_download_images + name)
        except:
            count -= 1
            continue
        print ' {}. Downloaded `{}`'.format(count, name)

    print '[i] Success downloaded {} images to path `{}`'.format(count, path_download_images)

Source File: cifar_prepare.py From ngraph-python with Apache License 2.0

6 votes

def loadData(src):
    print('Downloading ' + src)
    fname, h = urlretrieve(src, './delete.me')
    print('Done.')
    try:
        print('Extracting files...')
        with tarfile.open(fname) as tar:
            tar.extractall()
        print('Done.')
        print('Preparing train set...')
        trn = np.empty((0, numFeature + 1), dtype=np.int)
        for i in range(5):
            batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
            trn = np.vstack((trn, readBatch(batchName)))
        print('Done.')
        print('Preparing test set...')
        tst = readBatch('./cifar-10-batches-py/test_batch')
        print('Done.')
    finally:
        os.remove(fname)
    return (trn, tst)

Source File: get_data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def get_mnist(data_dir):
    if not os.path.isdir(data_dir):
        os.system("mkdir " + data_dir)
    os.chdir(data_dir)
    if (not os.path.exists('train-images-idx3-ubyte')) or \
       (not os.path.exists('train-labels-idx1-ubyte')) or \
       (not os.path.exists('t10k-images-idx3-ubyte')) or \
       (not os.path.exists('t10k-labels-idx1-ubyte')):
        import urllib, zipfile
        zippath = os.path.join(os.getcwd(), "mnist.zip")
        urllib.urlretrieve("http://data.mxnet.io/mxnet/data/mnist.zip", zippath)
        zf = zipfile.ZipFile(zippath, "r")
        zf.extractall()
        zf.close()
        os.remove(zippath)
    os.chdir("..")

Source File: mnist_training.py From ngraph-python with Apache License 2.0

6 votes

def loadData(src, cimg):
    gzfname, h = urlretrieve(src, './delete.me')
    try:
        with gzip.open(gzfname) as gz:
            n = struct.unpack('I', gz.read(4))
            if n[0] != 0x3080000:
                raise Exception('Invalid file: unexpected magic number.')
            n = struct.unpack('>I', gz.read(4))[0]
            if n != cimg:
                raise Exception('Invalid file: expected {0} entries.'.format(cimg))
            crow = struct.unpack('>I', gz.read(4))[0]
            ccol = struct.unpack('>I', gz.read(4))[0]
            if crow != 28 or ccol != 28:
                raise Exception('Invalid file: expected 28 rows/cols per image.')
            res = np.fromstring(gz.read(cimg * crow * ccol), dtype=np.uint8)
    finally:
        os.remove(gzfname)
    return res.reshape((cimg, crow * ccol))

Source File: webutils.py From program.plexus with GNU General Public License v2.0

5 votes

def Downloader(self,url,dest,description,heading):
		dp = xbmcgui.DialogProgress()
		dp.create(heading,description,'')
		dp.update(0)
		urllib.urlretrieve(url,dest,lambda nb, bs, fs, url=url: self._pbhook(nb,bs,fs,dp))

Source File: utils.py From tensor2tensor with Apache License 2.0

5 votes

def download(url, download_dir):
  outname = os.path.join(download_dir, os.path.basename(url))
  if tf.gfile.Exists(outname):
    print('Found %s, skipping download' % outname)
    return outname
  inprogress = outname + '.incomplete'
  print('Downloading %s' % url)
  inprogress, _ = urllib.urlretrieve(url, inprogress)
  tf.gfile.Rename(inprogress, outname)
  return outname

Source File: t-less_download.py From patch_linemod with BSD 2-Clause "Simplified" License

5 votes

def callback_progress(blocks, block_size, total_size, bar_function):
    """callback function for urlretrieve that is called when connection is
    created and when once for each block

    draws adaptive progress bar in terminal/console

    use sys.stdout.write() instead of "print,", because it allows one more
    symbol at the line end without linefeed on Windows

    :param blocks: number of blocks transferred so far
    :param block_size: in bytes
    :param total_size: in bytes, can be -1 if server doesn't return it
    :param bar_function: another callback function to visualize progress
    """
    global __current_size

    width = min(100, get_console_width())

    if sys.version_info[:3] == (3, 3, 0):  # regression workaround
        if blocks == 0:  # first call
            __current_size = 0
        else:
            __current_size += block_size
        current_size = __current_size
    else:
        current_size = min(blocks*block_size, total_size)
    progress = bar_function(current_size, total_size, width)
    if progress:
        sys.stdout.write("\r" + progress)

Source File: test_var_nn_embedded_vec_classifier.py From PyShortTextCategorization with MIT License

5 votes

def setUp(self):
        print("Downloading word-embedding model....")
        link = "https://shorttext-data-northernvirginia.s3.amazonaws.com/trainingdata/test_w2v_model.bin"
        filename = "test_w2v_model.bin"
        if not os.path.isfile("test_w2v_model.bin"):
            if sys.version_info[0]==2:
                urllib.urlretrieve(link, filename)
            else:
                urllib.request.urlretrieve(link, filename)
        self.w2v_model = shorttext.utils.load_word2vec_model(filename, binary=True)  # load word2vec model
        self.trainclass_dict = shorttext.data.subjectkeywords()  # load training data

Source File: test_urllib.py From ironpython2 with Apache License 2.0

5 votes

def test_short_content_raises_ContentTooShortError_without_reporthook(self):
        self.fakehttp('''HTTP/1.1 200 OK
Date: Wed, 02 Jan 2008 03:03:54 GMT
Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
Connection: close
Content-Length: 100
Content-Type: text/html; charset=iso-8859-1

FF
''')
        try:
            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
        finally:
            self.unfakehttp()

Source File: test_urllib.py From ironpython2 with Apache License 2.0

5 votes

def test_reporthook_5_bytes(self):
        # Test on 5 byte file. Should call reporthook only 2 times (once when
        # the "network connection" is established and once when the block is
        # read). Since the block size is 8192 bytes, only one block read is
        # required to read the entire file.
        report = []
        def hooktester(count, block_size, total_size, _report=report):
            _report.append((count, block_size, total_size))
        srcFileName = self.createNewTempFile("x" * 5)
        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
            test_support.TESTFN, hooktester)
        self.assertEqual(len(report), 2)
        self.assertEqual(report[0][1], 8192)
        self.assertEqual(report[0][2], 5)

Source File: test_urllib.py From ironpython2 with Apache License 2.0

5 votes

def test_reporthook_0_bytes(self):
        # Test on zero length file. Should call reporthook only 1 time.
        report = []
        def hooktester(count, block_size, total_size, _report=report):
            _report.append((count, block_size, total_size))
        srcFileName = self.createNewTempFile()
        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
            test_support.TESTFN, hooktester)
        self.assertEqual(len(report), 1)
        self.assertEqual(report[0][2], 0)

Source File: test_urllib.py From ironpython2 with Apache License 2.0

5 votes

def test_reporthook(self):
        # Make sure that the reporthook works.
        def hooktester(count, block_size, total_size, count_holder=[0]):
            self.assertIsInstance(count, int)
            self.assertIsInstance(block_size, int)
            self.assertIsInstance(total_size, int)
            self.assertEqual(count, count_holder[0])
            count_holder[0] = count_holder[0] + 1
        second_temp = "%s.2" % test_support.TESTFN
        self.registerFileForCleanUp(second_temp)
        urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
            second_temp, hooktester)

Source File: test_urllib.py From ironpython2 with Apache License 2.0

5 votes

def test_basic(self):
        # Make sure that a local file just gets its own location returned and
        # a headers value is returned.
        result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
        self.assertEqual(result[0], test_support.TESTFN)
        self.assertIsInstance(result[1], mimetools.Message,
                              "did not get a mimetools.Message instance as "
                              "second returned value")

Source File: test_urllibnet.py From ironpython2 with Apache License 2.0

5 votes

def test_data_header(self):
        logo = test_support.TEST_HTTP_URL
        file_location, fileheaders = self.urlretrieve(logo)
        os.unlink(file_location)
        datevalue = fileheaders.getheader('Date')
        dateformat = '%a, %d %b %Y %H:%M:%S GMT'
        try:
            time.strptime(datevalue, dateformat)
        except ValueError:
            self.fail('Date value not in %r format', dateformat)

Source File: test_urllibnet.py From ironpython2 with Apache License 2.0

5 votes

def test_header(self):
        # Make sure header returned as 2nd value from urlretrieve is good.
        file_location, header = self.urlretrieve(test_support.TEST_HTTP_URL)
        os.unlink(file_location)
        self.assertIsInstance(header, mimetools.Message,
                              "header is not an instance of mimetools.Message")

Source File: test_urllibnet.py From ironpython2 with Apache License 2.0

5 votes

def test_specified_path(self):
        # Make sure that specifying the location of the file to write to works.
        file_location,info = self.urlretrieve(test_support.TEST_HTTP_URL,
                                              test_support.TESTFN)
        self.assertEqual(file_location, test_support.TESTFN)
        self.assertTrue(os.path.exists(file_location))
        FILE = file(file_location)
        try:
            self.assertTrue(FILE.read(), "reading from temporary file failed")
        finally:
            FILE.close()
            os.unlink(file_location)

Python urllib.urlretrieve() Examples