Python urllib.request() Examples

The following are 30 code examples of urllib.request(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urllib , or try the search function .
Example #1
Source File: example_tools.py    From buzzard with Apache License 2.0 6 votes vote down vote up
def get_url(url):
    req_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13',
        'Referer': url
    }

    request = urllib.request.Request(url, headers=req_headers)
    opener = urllib.request.build_opener()
    timer = 1
    for i in range(1):
        try:
            return opener.open(request).read()
        except:
            time.sleep(timer)
            timer *= 2
    raise IOError("Unable to download `%s`."%url) 
Example #2
Source File: example_tools.py    From buzzard with Apache License 2.0 6 votes vote down vote up
def _url_status(url):
    parse_obj = urllib.parse.urlparse(url)

    timer = 1
    for i in range(6):
        try:
            connection = http.client.HTTPConnection(parse_obj.netloc)
            connection.request('HEAD', parse_obj.path)
            break
        except Exception as e:
            print(url, e, 'sleep', timer)
            time.sleep(timer)
            timer *= 2
    else:
        return e

    response = connection.getresponse()
    connection.close()
    return response.status 
Example #3
Source File: webresourcecache.py    From codimension with GNU General Public License v3.0 6 votes vote down vote up
def getResource(self, url, uuid):
        """Provides the resource"""
        if self.__cacheDir is None:
            return None

        if url in self.__urlToFileName:
            return self.__urlToFileName[url]

        fName = self.__cacheDir + hashlib.md5(url.encode('utf-8')).hexdigest()
        if fName in self.__threads:
            # Reject double request
            return None

        thread = ResourceRetriever()
        self.__threads[fName] = thread
        self.__connectThread(thread)
        thread.get(url, fName, uuid)
        return None 
Example #4
Source File: cutout.py    From cutout with MIT License 6 votes vote down vote up
def get_response(url):
	headers = {}
	headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
	req = urllib.request.Request(
		url = url,
		headers = headers
	)
	#try: 
	response = urllib.request.urlopen(req)
	data = response.read()
	#except: # 抓取出错
	#	return None
	if response.info().get('Content-Encoding') == 'gzip':
		data = ungzip(data)
	elif response.info().get('Content-Encoding') == 'deflate':
		data = undeflate(data)
	response.data = data
	return response



## 抓取网页html 
Example #5
Source File: stash.py    From geofront with GNU Affero General Public License v3.0 6 votes vote down vote up
def request(self, method: str, url: str, body=None, headers=None,
                **client_options):
        client = self.create_client(**client_options)
        url, headers, body = client.sign(url, method, body, headers)
        request = urllib.request.Request(url, body, headers, method=method)
        try:
            return urllib.request.urlopen(request)
        except urllib.error.HTTPError as e:
            logger = logging.getLogger(__name__ + '.StashTeam.request')
            logger.exception(
                '[%s %s] %s\nrequest headers: %r\nrequest body: %r\n'
                'client_options: %r\nresponse status: %r\n'
                'response headers: %r\nresponse body: %r',
                method, url, e, headers, body, client_options,
                e.code, dict(e.headers), e.read()
            )
            raise 
Example #6
Source File: utils.py    From pySmartDL with The Unlicense 6 votes vote down vote up
def get_filesize(url, timeout=15):
    '''
    Fetches file's size of a file over HTTP.
    
    :param url: Url address.
    :type url: string
    :param timeout: Timeout in seconds. Default is 15.
    :type timeout: int
    :returns: Size in bytes.
    :rtype: int
    '''
    try:
        urlObj = urllib.request.urlopen(url, timeout=timeout)
        file_size = int(urlObj.headers["Content-Length"])
    except (IndexError, KeyError, TypeError, urllib.error.HTTPError, urllib.error.URLError):
        return 0
        
    return file_size 
Example #7
Source File: main.py    From arches with GNU Affero General Public License v3.0 6 votes vote down vote up
def feature_popup_content(request):
    url = request.POST.get("url", None)

    if url is not None:
        host = "{uri.hostname}".format(uri=urlparse(url))
        try:
            if host in settings.ALLOWED_POPUP_HOSTS:
                if url is not None:
                    f = urllib.request.urlopen(url)
                    return HttpResponse(f.read())
            else:
                raise Exception()
        except:
            return HttpResponseNotFound()
    else:
        return HttpResponseNotFound() 
Example #8
Source File: stash.py    From geofront with GNU Affero General Public License v3.0 6 votes vote down vote up
def register(self, identity: Identity, public_key: PKey) -> None:
        team = self.team
        if not (isinstance(team, identity.team_type) and
                cast(str, identity.identifier).startswith(team.server_url)):
            return
        data = json.dumps({
            'text': format_openssh_pubkey(public_key)
        })
        try:
            self.request(
                identity, 'POST', self.REGISTER_URL.format(self.team), data,
                headers={'Content-Type': 'application/json'}
            )
        except urllib.error.HTTPError as e:
            if e.code == 409:
                errors = json.loads(e.read().decode('utf-8'))['errors']
                raise DuplicatePublicKeyError(errors[0]['message'])
            raise 
Example #9
Source File: webclient.py    From ibllib with MIT License 6 votes vote down vote up
def patch(self, rest_query, data=None):
        """
        Sends a PATCH request to the Alyx server.
        For the dictionary contents, refer to:
        https://alyx.internationalbrainlab.org/docs

        :param rest_query: (required)the endpoint as full or relative URL
        :type rest_query: str
        :param data: json encoded string or dictionary
        :type data: None, dict or str

        :return: response object
        """
        if isinstance(data, dict):
            data = json.dumps(data)
        return self._generic_request(requests.patch, rest_query, data=data) 
Example #10
Source File: webclient.py    From ibllib with MIT License 6 votes vote down vote up
def get(self, rest_query):
        """
        Sends a GET request to the Alyx server. Will raise an exception on any status_code
        other than 200, 201.
        For the dictionary contents and list of endpoints, refer to:
        https://alyx.internationalbrainlab.org/docs

        :param rest_query: example: '/sessions?user=Hamish'.
        :type rest_query: str

        :return: (dict/list) json interpreted dictionary from response
        """
        rep = self._generic_request(requests.get, rest_query)
        if isinstance(rep, dict) and list(rep.keys()) == ['count', 'next', 'previous', 'results']:
            if len(rep['results']) < rep['count']:
                rep = _PaginatedResponse(self, rep)
            else:
                rep = rep['results']
        return rep 
Example #11
Source File: stash.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def authenticate(self,
                     state,
                     requested_redirect_url: str,
                     wsgi_environ: Mapping[str, object]) -> Identity:
        logger = logging.getLogger(__name__ + '.StashTeam.authenticate')
        logger.debug('state = %r', state)
        try:
            oauth_token, oauth_token_secret = state
        except ValueError:
            raise AuthenticationError()
        req = Request(wsgi_environ, populate_request=False, shallow=True)
        args = cast(ImmutableMultiDict, req.args)
        logger.debug('req.args = %r', args)
        if args.get('oauth_token') != oauth_token:
            raise AuthenticationError()
        response = self.request(
            'POST', self.ACCESS_TOKEN_URL.format(self),
            resource_owner_key=oauth_token,
            resource_owner_secret=oauth_token_secret
        )
        access_token = url_decode_stream(response)
        logger.debug('access_token = %r', access_token)
        response.close()
        response = self.request(
            'GET', self.USER_URL.format(self),
            resource_owner_key=access_token['oauth_token'],
            resource_owner_secret=access_token['oauth_token_secret']
        )
        whoami = response.read().decode('utf-8')
        return Identity(
            type(self),
            self.USER_PROFILE_URL.format(self, whoami),
            (access_token['oauth_token'], access_token['oauth_token_secret'])
        ) 
Example #12
Source File: webresourcecache.py    From codimension with GNU General Public License v3.0 5 votes vote down vote up
def run(self):
        """Run the retriever"""
        try:
            req = urllib.request.urlopen(self.__url, timeout=TIMEOUT)
            saveBinaryToFile(self.__fName, req.read())
            self.sigRetrieveOK.emit(self.__url, self.__uuid, self.__fName)
        except Exception as exc:
            logging.error('Cannot retrieve %s: %s', self.__url, str(exc))
            self.sigRetrieveError.emit(self.__url, self.__fName) 
Example #13
Source File: stash.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def request(self, identity, *args, **kwargs):
        token, token_secret = identity.access_token
        return self.team.request(
            *args,
            resource_owner_key=token,
            resource_owner_secret=token_secret,
            **kwargs
        ) 
Example #14
Source File: stash.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def deregister(self, identity: Identity, public_key: PKey) -> None:
        keys = self.request_list(identity)
        for key in keys:
            if parse_openssh_pubkey(key['text']) == public_key:
                response = self.request(
                    identity,
                    'DELETE',
                    self.DEREGISTER_URL.format(self.team, key['id'])
                )
                assert response.code == 204
                break 
Example #15
Source File: learn.py    From learn2018-autodown with MIT License 5 votes vote down vote up
def download(uri, name):
    filename = escape(name)
    if os.path.exists(filename) and os.path.getsize(filename) or 'Connection__close' in filename:
        return
    try:
        with TqdmUpTo(ascii=True, dynamic_ncols=True, unit='B', unit_scale=True, miniters=1, desc=filename) as t:
            urllib.request.urlretrieve(url+uri, filename=filename, reporthook=t.update_to, data=None)
    except:
        print('Could not download file %s ... removing broken file' % filename)
        if os.path.exists(filename):
            os.remove(filename)
        return 
Example #16
Source File: webclient.py    From ibllib with MIT License 5 votes vote down vote up
def delete(self, rest_query):
        """
        Sends a DELETE request to the Alyx server. Will raise an exception on any status_code
        other than 200, 201.

        :param rest_query: examples:
         '/weighings/c617562d-c107-432e-a8ee-682c17f9e698'
         'https://test.alyx.internationalbrainlab.org/weighings/c617562d-c107-432e-a8ee-682c17f9e698'.
        :type rest_query: str

        :return: (dict/list) json interpreted dictionary from response
        """
        return self._generic_request(requests.delete, rest_query) 
Example #17
Source File: webclient.py    From ibllib with MIT License 5 votes vote down vote up
def post(self, rest_query, data=None):
        """
        Sends a POST request to the Alyx server.
        For the dictionary contents, refer to:
        https://alyx.internationalbrainlab.org/docs

        :param rest_query: (required)the endpoint as full or relative URL
        :type rest_query: str
        :param data: dictionary or json encoded string
        :type data: None, dict or str

        :return: response object
        """
        return self._generic_request(requests.post, rest_query, data=data) 
Example #18
Source File: learn.py    From learn2018-autodown with MIT License 5 votes vote down vote up
def open_page(uri, values={}):
    post_data = urllib.parse.urlencode(values).encode() if values else None
    request = urllib.request.Request(uri if uri.startswith('http') else url + uri, post_data, headers)
    try:
        response = opener.open(request)
        return response
    except urllib.error.URLError as e:
        print(uri, e.code, ':', e.reason) 
Example #19
Source File: webresourcecache.py    From codimension with GNU General Public License v3.0 5 votes vote down vote up
def get(self, url, fName, uuid):
        """Initiate the resource request"""
        self.__url = url
        self.__uuid = uuid
        self.__fName = fName
        self.start() 
Example #20
Source File: dataset.py    From vergeml with MIT License 5 votes vote down vote up
def download_files(self, urls, env, headers=None, dir=None):

        if dir is None:
            dir = env.get('cache-dir')
        
        dest_directory = os.path.join(dir, "tmp_" + str(uuid.uuid4()))
        
        if not os.path.exists(dest_directory):
            os.makedirs(dest_directory)

        for data_url in urls:
            if isinstance(data_url, tuple):
                data_url, download_file = data_url
            else:
                download_file = data_url.split('/')[-1]

            download_path = os.path.join(dest_directory, download_file)

            if headers:
                opener = urllib.request.build_opener()
                opener.addheaders = headers
                urllib.request.install_opener(opener)

            try:
                urllib.request.urlretrieve(data_url, filename=download_path, 
                                           reporthook=self._report_hook(download_file), data=None)
            except Exception as e:
                raise VergeMLError("Could not download {}: {}".format(data_url, e))
            finally:
                if headers:
                    urllib.request.install_opener(urllib.request.build_opener())

        return dest_directory 
Example #21
Source File: stash.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def request_authentication(
        self, redirect_url: str
    ) -> AuthenticationContinuation:
        response = self.request('POST', self.REQUEST_TOKEN_URL.format(self))
        request_token = url_decode_stream(response)
        response.close()
        return AuthenticationContinuation(
            self.AUTHORIZE_URL.format(self) + '?' + url_encode({
                'oauth_token': request_token['oauth_token'],
                'oauth_callback': redirect_url
            }),
            (request_token['oauth_token'], request_token['oauth_token_secret'])
        ) 
Example #22
Source File: github.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def deregister(self, identity: Identity, public_key: PKey) -> None:
        for pkey, key in self._list_keys(identity):
            if pkey == public_key:
                request(identity, self.deregister_url.format(**key), 'DELETE')
                break 
Example #23
Source File: github.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def register(self, identity: Identity, public_key: PKey) -> None:
        logger = self.logger.getChild('register')
        title = get_key_fingerprint(public_key)
        data = json.dumps({
            'title': title,
            'key': format_openssh_pubkey(public_key)
        })
        try:
            request(identity, self.list_url, 'POST', data=data.encode())
        except urllib.error.HTTPError as e:
            if e.code != 422:
                raise
            content_type = e.headers.get('Content-Type')
            mimetype, options = parse_options_header(content_type)
            if mimetype != 'application/json':
                raise
            charset = options.get('charset', 'utf-8')
            content_body = e.read().decode(charset)
            logger.debug('response body:\n%s', content_body)
            response = json.loads(content_body)
            for error in response.get('errors', []):
                if not isinstance(error, dict):
                    continue
                elif error.get('field') != 'key':
                    continue
                message = error.get('message', '').strip().lower()
                if message != 'key is already in use':
                    continue
                raise DuplicatePublicKeyError(message)
            raise 
Example #24
Source File: github.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def list_groups(self, identity: Identity) -> GroupSet:
        if not issubclass(identity.team_type, type(self)):
            return frozenset()
        try:
            response = request(identity, self.teams_list_url)
        except IOError:
            return frozenset()
        if isinstance(response, collections.abc.Mapping) and \
           'error' in response:
            return frozenset()
        return frozenset(t['slug']
                         for t in response
                         if t['organization']['login'] == self.org_login) 
Example #25
Source File: github.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def authorize(self, identity: Identity) -> bool:
        if not issubclass(identity.team_type, type(self)):
            return False
        try:
            response = request(identity, self.orgs_list_url)
        except IOError:
            return False
        if isinstance(response, collections.abc.Mapping) and \
           'error' in response:
            return False
        return any(o['login'] == self.org_login for o in response) 
Example #26
Source File: github.py    From geofront with GNU Affero General Public License v3.0 5 votes vote down vote up
def determine_identity(self, access_token: str) -> Identity:
        user_data = request(access_token, self.user_url)
        return Identity(type(self), user_data['login'], access_token) 
Example #27
Source File: networking.py    From gradio-UI with Apache License 2.0 5 votes vote down vote up
def url_request(url):
    try:
        req = urllib.request.Request(
            url=url, headers={"content-type": "application/json"}
        )
        res = urllib.request.urlopen(req, timeout=10)
        return res
    except Exception as e:
        raise RuntimeError(str(e)) 
Example #28
Source File: networking.py    From gradio-UI with Apache License 2.0 5 votes vote down vote up
def url_request(url):
    try:
        req = urllib.request.Request(
            url=url, headers={"content-type": "application/json"}
        )
        res = urllib.request.urlopen(req, timeout=10)
        return res
    except Exception as e:
        raise RuntimeError(str(e)) 
Example #29
Source File: utils.py    From pySmartDL with The Unlicense 5 votes vote down vote up
def is_HTTPRange_supported(url, timeout=15):
    '''
    Checks if a server allows `Byte serving <https://en.wikipedia.org/wiki/Byte_serving>`_,
    using the Range HTTP request header and the Accept-Ranges and Content-Range HTTP response headers.
    
    :param url: Url address.
    :type url: string
    :param timeout: Timeout in seconds. Default is 15.
    :type timeout: int
    :rtype: bool
    '''
    url = url.replace(' ', '%20')
    
    fullsize = get_filesize(url)
    if not fullsize:
        return False
    
    headers = {'Range': 'bytes=0-3'}
    req = urllib.request.Request(url, headers=headers)
    urlObj = urllib.request.urlopen(req, timeout=timeout)
    urlObj.close()
    
    if "Content-Length" not in urlObj.headers:
        return False

    filesize = int(urlObj.headers["Content-Length"])
    return filesize != fullsize 
Example #30
Source File: update_3rdparty.py    From qutebrowser with GNU General Public License v3.0 5 votes vote down vote up
def test_dicts():
    """Test available dictionaries."""
    configdata.init()
    for lang in dictcli.available_languages():
        print('Testing dictionary {}... '.format(lang.code), end='')
        lang_url = urllib.parse.urljoin(dictcli.API_URL, lang.remote_filename)
        request = urllib.request.Request(lang_url, method='HEAD')
        response = urllib.request.urlopen(request)
        if response.status == 200:
            print('OK')
        else:
            print('ERROR: {}'.format(response.status))