Python urllib.request.build_opener() Examples
The following are 30
code examples of urllib.request.build_opener().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urllib.request
, or try the search function
.
Example #1
Source File: CME1.py From web-scraping with Apache License 2.0 | 7 votes |
def scrape(category_name,commodity_name): #i use proxy handler cuz my uni network runs on its proxy #and i cannot authenticate python through the proxy #so i use empty proxy to bypass the authentication proxy_handler = u.ProxyHandler({}) opener = u.build_opener(proxy_handler) #cme officially forbids scraping #so a header must be used for disguise as an internet browser #the developers say no to scraping, it appears to be so #but actually they turn a blind eye to us, thx #i need different types of commodity #so i need to format the website for each commodity req=u.Request('http://www.cmegroup.com/trading/metals/%s/%s.html'%( category_name,commodity_name),headers={'User-Agent': 'Mozilla/5.0'}) response=opener.open(req) result=response.read() soup=bs(result,'html.parser') return soup #
Example #2
Source File: githubpy.py From osint-scraper with MIT License | 7 votes |
def get_access_token(self, code, state=None): ''' In callback url: http://host/callback?code=123&state=xyz use code and state to get an access token. ''' kw = dict(client_id=self._client_id, client_secret=self._client_secret, code=code) if self._redirect_uri: kw['redirect_uri'] = self._redirect_uri if state: kw['state'] = state opener = build_opener(HTTPSHandler) request = Request('https://github.com/login/oauth/access_token', data=_encode_params(kw)) request.get_method = _METHOD_MAP['POST'] request.add_header('Accept', 'application/json') try: response = opener.open(request, timeout=TIMEOUT) r = _parse_json(response.read()) if 'error' in r: raise ApiAuthError(str(r.error)) return str(r.access_token) except HTTPError as e: raise ApiAuthError('HTTPError when get access token')
Example #3
Source File: common.py From acmpv with Do What The F*ck You Want To Public License | 6 votes |
def get_response(url, faker = False): logging.debug('get_response: %s' % url) # install cookies if cookies: opener = request.build_opener(request.HTTPCookieProcessor(cookies)) request.install_opener(opener) if faker: response = request.urlopen(request.Request(url, headers = fake_headers), None) else: response = request.urlopen(url) data = response.read() if response.info().get('Content-Encoding') == 'gzip': data = ungzip(data) elif response.info().get('Content-Encoding') == 'deflate': data = undeflate(data) response.data = data return response # DEPRECATED in favor of get_content()
Example #4
Source File: exposition.py From client_python with Apache License 2.0 | 6 votes |
def default_handler(url, method, timeout, headers, data): """Default handler that implements HTTP/HTTPS connections. Used by the push_to_gateway functions. Can be re-used by other handlers.""" def handle(): request = Request(url, data=data) request.get_method = lambda: method for k, v in headers: request.add_header(k, v) resp = build_opener(HTTPHandler).open(request, timeout=timeout) if resp.code >= 400: raise IOError("error talking to pushgateway: {0} {1}".format( resp.code, resp.msg)) return handle
Example #5
Source File: httpclient.py From opsbro with MIT License | 6 votes |
def post(uri, params={}, headers={}): data = None # always none in GET if params: # TODO: sure it's json and not urlencode? # data = urlencode(params) data = unicode_to_bytes(jsoner.dumps(params)) url_opener = build_opener(HTTPHandler) req = Request(uri, data) req.get_method = lambda: 'POST' for (k, v) in headers.items(): req.add_header(k, v) request = url_opener.open(req) response = request.read() # code = request.code return response
Example #6
Source File: httpclient.py From opsbro with MIT License | 6 votes |
def put(uri, data=None, params={}, headers=None): # data = None # always none in GET if headers is None: headers = {} if params: # TODO: sure it's json and not urlencode? # data = urlencode(params) uri = "%s?%s" % (uri, urlencode(params)) headers['Content-Type'] = 'your/contenttype' url_opener = build_opener(HTTPHandler) req = Request(uri, data) req.get_method = lambda: 'PUT' for (k, v) in headers.items(): req.add_header(k, v) request = url_opener.open(req) response = request.read() # code = request.code return response
Example #7
Source File: website_helper.py From aws-media-insights-engine with Apache License 2.0 | 6 votes |
def send_response(event, context, response_status, response_data): """ Send a resource manipulation status response to CloudFormation """ response_body = json.dumps({ "Status": response_status, "Reason": "See the details in CloudWatch Log Stream: " + context.log_stream_name, "PhysicalResourceId": context.log_stream_name, "StackId": event['StackId'], "RequestId": event['RequestId'], "LogicalResourceId": event['LogicalResourceId'], "Data": response_data }) LOGGER.info('ResponseURL: {s}'.format(s=event['ResponseURL'])) LOGGER.info('ResponseBody: {s}'.format(s=response_body)) opener = build_opener(HTTPHandler) request = Request(event['ResponseURL'], data=response_body.encode('utf-8')) request.add_header('Content-Type', '') request.add_header('Content-Length', len(response_body)) request.get_method = lambda: 'PUT' response = opener.open(request) LOGGER.info("Status code: {s}".format(s=response.getcode)) LOGGER.info("Status message: {s}".format(s=response.msg))
Example #8
Source File: httpclient.py From opsbro with MIT License | 6 votes |
def delete(uri, params={}, headers={}): data = None # always none in GET if params: uri = "%s?%s" % (uri, urlencode(params)) url_opener = build_opener(HTTPHandler) req = Request(uri, data) req.get_method = lambda: 'DELETE' for (k, v) in headers.items(): req.add_header(k, v) request = url_opener.open(req) response = request.read() # code = request.code return response
Example #9
Source File: py3.py From php-load-test with Do What The F*ck You Want To Public License | 6 votes |
def check_php_multipartform_dos(url, post_body, headers, ip): try: proxy_handler = urllib2.ProxyHandler({"http": ip}) null_proxy_handler = urllib2.ProxyHandler({}) opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(opener) req = urllib2.Request(url) for key in headers.keys(): req.add_header(key, headers[key]) starttime = datetime.datetime.now() fd = urllib2.urlopen(req, post_body) html = fd.read() endtime = datetime.datetime.now() usetime = (endtime - starttime).seconds if(usetime > 5): result = url+" is vulnerable" else: if(usetime > 3): result = "need to check normal respond time" return [result, usetime] except KeyboardInterrupt: exit() # end
Example #10
Source File: app.py From aws-media-insights-engine with Apache License 2.0 | 6 votes |
def send_response(event, context, response_status, response_data): '''Send a resource manipulation status response to CloudFormation''' response_body = json.dumps({ "Status": response_status, "Reason": "See the details in CloudWatch Log Stream: " + context.log_stream_name, "PhysicalResourceId": context.log_stream_name, "StackId": event['StackId'], "RequestId": event['RequestId'], "LogicalResourceId": event['LogicalResourceId'], "Data": response_data }) logger.info('ResponseURL: %s', event['ResponseURL']) logger.info('ResponseBody: %s', response_body) opener = build_opener(HTTPHandler) request = Request(event['ResponseURL'], data=response_body.encode('utf-8')) request.add_header('Content-Type', '') request.add_header('Content-Length', len(response_body)) request.get_method = lambda: 'PUT' response = opener.open(request) logger.info("Status code: %s", response.getcode()) logger.info("Status message: %s", response.msg)
Example #11
Source File: scholar.py From dblp with MIT License | 6 votes |
def __init__(self): self.articles = [] self.query = None self.cjar = MozillaCookieJar() # If we have a cookie file, load it: if ScholarConf.COOKIE_JAR_FILE and \ os.path.exists(ScholarConf.COOKIE_JAR_FILE): try: self.cjar.load(ScholarConf.COOKIE_JAR_FILE, ignore_discard=True) ScholarUtils.log('info', 'loaded cookies file') except Exception as msg: ScholarUtils.log('warn', 'could not load cookies file: %s' % msg) self.cjar = MozillaCookieJar() # Just to be safe self.opener = build_opener(HTTPCookieProcessor(self.cjar)) self.settings = None # Last settings object, if any
Example #12
Source File: ghost.py From GhostPotato with MIT License | 6 votes |
def do_socks(self, line): headers = ["Protocol", "Target", "Username", "AdminStatus", "Port"] url = "http://localhost:9090/ntlmrelayx/api/v1.0/relays" try: proxy_handler = ProxyHandler({}) opener = build_opener(proxy_handler) response = Request(url) r = opener.open(response) result = r.read() items = json.loads(result) except Exception as e: logging.error("ERROR: %s" % str(e)) else: if len(items) > 0: self.printTable(items, header=headers) else: logging.info('No Relays Available!')
Example #13
Source File: common.py From acmpv with Do What The F*ck You Want To Public License | 6 votes |
def get_response(url, faker = False): logging.debug('get_response: %s' % url) # install cookies if cookies: opener = request.build_opener(request.HTTPCookieProcessor(cookies)) request.install_opener(opener) if faker: response = request.urlopen(request.Request(url, headers = fake_headers), None) else: response = request.urlopen(url) data = response.read() if response.info().get('Content-Encoding') == 'gzip': data = ungzip(data) elif response.info().get('Content-Encoding') == 'deflate': data = undeflate(data) response.data = data return response # DEPRECATED in favor of get_content()
Example #14
Source File: unixclient.py From opsbro with MIT License | 5 votes |
def get_local(u, local_socket, params={}, method='GET', timeout=10): UnixHTTPConnection.socket_timeout = timeout data = None special_headers = [] if method == 'GET' and params: u = "%s?%s" % (u, urlencode(params)) if method == 'POST' and params: data = string_encode(urlencode(params)) if method == 'PUT' and params: special_headers.append(('Content-Type', 'your/contenttype')) data = string_encode(params) # not the same way to connect # * windows: TCP # * unix : unix socket if os.name == 'nt': url_opener = build_opener(HTTPHandler) uri = 'http://127.0.0.1:6770%s' % u else: # unix url_opener = build_opener(UnixSocketHandler()) uri = 'unix:/%s%s' % (local_socket, u) logger.debug("Connecting to local http/unix socket at: %s with method %s" % (uri, method)) req = Request(uri, data) req.get_method = lambda: method for (k, v) in special_headers: req.add_header(k, v) request = url_opener.open(req) response = request.read() code = request.code return (code, response)
Example #15
Source File: payloadtools.py From wfuzz with GNU General Public License v2.0 | 5 votes |
def _do_search(self, offset=0, limit=50): # some code taken from http://www.securitybydefault.com/2014/07/search2auditpy-deja-que-bing-haga-el.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+SecurityByDefault+%28Security+By+Default%29 # api doc http://go.microsoft.com/fwlink/?LinkID=248077 user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; FDM; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 1.1.4322)' creds = (':%s' % self._key).encode('base64')[:-1] auth = 'Basic %s' % creds result = None try: urlstr = 'https://api.datamarket.azure.com/Data.ashx/Bing/Search/Composite?Sources=%27web%27&Query=%27' + self._dork + '%27&$format=json' if limit != 50: urlstr += "&$top=%d" % limit if offset != 0: urlstr += "&$skip=%d" % offset request = Request(urlstr) request.add_header('Authorization', auth) request.add_header('User-Agent', user_agent) requestor = build_opener() result = requestor.open(request) except Exception as e: raise FuzzExceptResourceParseError("Error when retrieving Bing API results: %s." % str(e)) results = json.loads(result.read()) # WebTotal is not reliable, it is usually much bigger than the actual results, therefore # if your offset increases over the real number of results, you get a dict # without values and counters to ''. It gets updated when you are close to that limit though. if results['d']['results'][0]["WebTotal"]: res_total = int(results['d']['results'][0]["WebTotal"]) res_list = results['d']['results'][0]['Web'] return res_total, len(res_list), res_list else: return 0, 0, 0
Example #16
Source File: __init__.py From article-date-extractor with MIT License | 5 votes |
def extractArticlePublishedDate(articleLink, html = None): print("Extracting date from " + articleLink) articleDate = None try: articleDate = _extractFromURL(articleLink) if html is None: request = urllib.Request(articleLink) # Using a browser user agent, decreases the change of sites blocking this request - just a suggestion # request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36') html = urllib.build_opener().open(request).read() parsedHTML = BeautifulSoup(html,"lxml") possibleDate = _extractFromLDJson(parsedHTML) if possibleDate is None: possibleDate = _extractFromMeta(parsedHTML) if possibleDate is None: possibleDate = _extractFromHTMLTag(parsedHTML) articleDate = possibleDate except Exception as e: print("Exception in extractArticlePublishedDate for " + articleLink) print(e.args) return articleDate
Example #17
Source File: ad_manager.py From googleads-python-lib with Apache License 2.0 | 5 votes |
def __init__(self, ad_manager_client, version=sorted(_SERVICE_MAP.keys())[-1], server=None): """Initializes a DataDownloader. Args: ad_manager_client: The AdManagerClient whose attributes will be used to authorize your report download and PQL query requests. [optional] version: A string identifying the Ad Manager version to connect to. This defaults to what is currently the latest version. This will be updated in future releases to point to what is then the latest version. server: A string identifying the webserver hosting the Ad Manager API. """ if not server: server = DEFAULT_ENDPOINT if server[-1] == '/': server = server[:-1] self._ad_manager_client = ad_manager_client self._version = version self._server = server self._report_service = None self._pql_service = None self.proxy_config = self._ad_manager_client.proxy_config handlers = self.proxy_config.GetHandlers() self.url_opener = build_opener(*handlers) if self._ad_manager_client.custom_http_headers: self.url_opener.addheaders.extend( self._ad_manager_client.custom_http_headers.items())
Example #18
Source File: util.py From chicago-justice with GNU General Public License v3.0 | 5 votes |
def load_html(url, with_cookies=False, headers={}): """Attempts to load an HTML page, returning a BeautifulSoup instance. Raises any networking or parsing exceptions""" if with_cookies: cj = CookieJar() opener = urlopen.build_opener(urlopen.HTTPCookieProcessor(cj)) else: opener = urlopen.build_opener() request = urlopen.Request(url, headers=headers) response = opener.open(request) html = response.read().decode('utf-8', errors='replace') soup = BeautifulSoup(html, 'html.parser') return soup
Example #19
Source File: common.py From googleads-python-lib with Apache License 2.0 | 5 votes |
def BuildOpener(self): """Builds an OpenerDirector instance using the ProxyConfig settings. This will return a urllib2.request.OpenerDirector instance. Returns: An OpenerDirector instance instantiated with settings defined in the ProxyConfig instance. """ return build_opener(*self.GetHandlers())
Example #20
Source File: adwords.py From googleads-python-lib with Apache License 2.0 | 5 votes |
def __init__(self, adwords_client, version=sorted(_SERVICE_MAP.keys())[-1], server=None): """Initializes a ReportDownloader. Args: adwords_client: The AdwordsClient whose attributes will be used to authorize your report download requests. [optional] version: A string identifying the AdWords version to connect to. This defaults to what is currently the latest version. This will be updated in future releases to point to what is then the latest version. server: A string identifying the webserver hosting the AdWords API. """ if not server: server = _DEFAULT_ENDPOINT server = server.rstrip('/') self._adwords_client = adwords_client self._namespace = self._NAMESPACE_FORMAT % version self._end_point = self._END_POINT_FORMAT % (server, version) self._header_handler = _AdWordsHeaderHandler( adwords_client, version, self._adwords_client.enable_compression, self._adwords_client.custom_http_headers) self.proxy_config = self._adwords_client.proxy_config handlers = self.proxy_config.GetHandlers() self.url_opener = build_opener(*handlers) if self._adwords_client.custom_http_headers: self.url_opener.addheaders.extend( adwords_client.custom_http_headers.items()) schema_url = self._SCHEMA_FORMAT % (server, version) service_class = googleads.common.GetSchemaHelperForLibrary() self.schema_helper = service_class( schema_url, self._adwords_client.timeout, self.proxy_config, self._namespace, self._adwords_client.cache)
Example #21
Source File: adwords.py From googleads-python-lib with Apache License 2.0 | 5 votes |
def __init__(self, request_builder, upload_url, current_content_length=0, is_last=False): """Initializes the IncrementalUpload. Args: request_builder: an AbstractUploadRequestBuilder instance. upload_url: a string url provided by the BatchJobService. current_content_length: an integer identifying the current content length of data uploaded to the Batch Job. is_last: a boolean indicating whether this is the final increment. Raises: GoogleAdsValueError: if the content length is lower than 0. """ self._request_builder = request_builder if current_content_length < 0: raise googleads.errors.GoogleAdsValueError( 'Current content length %s is < 0.' % current_content_length) self._current_content_length = current_content_length self._is_last = is_last self._url_opener = build_opener( *self._request_builder.client.proxy_config.GetHandlers()) if self._request_builder.client.custom_http_headers: self._url_opener.addheaders.extend( self._request_builder.client.custom_http_headers.items()) self._upload_url = self._InitializeURL(upload_url, current_content_length)
Example #22
Source File: url.py From beibq with BSD 3-Clause "New" or "Revised" License | 5 votes |
def post_url(url, data): header = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0', } req = request.Request(url, headers = header) data = parse.urlencode(data) opener = request.build_opener(request.HTTPCookieProcessor()) response = opener.open(req, data) return response.read()
Example #23
Source File: 刷网页.py From Python-Code with MIT License | 5 votes |
def brash(proxy_dict): #print(proxy_dict) global count global count1 if count1 < 100: try: #正常运行 count = count + 1 print(count, 'times') #监视程序是否在正常运行,输出运行了多少次 proxy_handler = request.ProxyHandler({'http': proxy_dict}) opener = request.build_opener(proxy_handler) request.install_opener(opener) countUrl = len(url) for i in range(countUrl): #遍历所有url req = request.Request(url[i], headers=head, method='POST') try: #lock.acquire() response = request.urlopen(req) #访问网页 html = response.read().decode('utf-8') print(html) #lock.release() except urllib.error.URLError as e: print(e.reason) print("EEEEEE") #time.sleep(1) #间隔执行 except Exception: #出现异常 print('Retry') count1 = count1 + 1 time.sleep(1) #间隔执行 else: print('much error')
Example #24
Source File: compat.py From johnnydep with MIT License | 5 votes |
def urlretrieve(url, filename, data=None, auth=None): if auth is not None: # https://docs.python.org/2.7/howto/urllib2.html#id6 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() # Add the username and password. # If we knew the realm, we could use it instead of None. username, password = auth top_level_url = urlparse(url).netloc password_mgr.add_password(None, top_level_url, username, password) handler = urllib2.HTTPBasicAuthHandler(password_mgr) # create "opener" (OpenerDirector instance) opener = urllib2.build_opener(handler) else: opener = urllib2.build_opener() res = opener.open(url, data=data) headers = res.info() with open(filename, "wb") as fp: fp.write(res.read()) return filename, headers
Example #25
Source File: common.py From acmpv with Do What The F*ck You Want To Public License | 5 votes |
def set_http_proxy(proxy): if proxy == None: # Use system default setting proxy_support = request.ProxyHandler() elif proxy == '': # Don't use any proxy proxy_support = request.ProxyHandler({}) else: # Use proxy proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy}) opener = request.build_opener(proxy_support) request.install_opener(opener)
Example #26
Source File: default.py From ru with GNU General Public License v2.0 | 5 votes |
def get_html(web_url): cookie_jar = cookielib.CookieJar() if mode == 'FAVS': cookie_jar = auth(cookie_jar) if antizapret_enabled: opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar), az.AntizapretProxyHandler()) else: opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar)) opener.addheaders = [("User-Agent", USER_AGENT)] connection = opener.open(web_url) html = connection.read() connection.close() return html.decode('utf-8')
Example #27
Source File: default.py From ru with GNU General Public License v2.0 | 5 votes |
def get_html_with_referer(page_url, referer): cookie_jar = cookielib.CookieJar() if mode == 'FAVS': cookie_jar = auth(cookie_jar) if antizapret_enabled: opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar), az.AntizapretProxyHandler()) else: opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar)) if referer is not None: opener.addheaders = [("Referer", referer)] connection = opener.open(page_url) html = connection.read() connection.close() return html.decode('utf-8')
Example #28
Source File: common.py From acmpv with Do What The F*ck You Want To Public License | 5 votes |
def unset_proxy(): proxy_handler = request.ProxyHandler({}) opener = request.build_opener(proxy_handler) request.install_opener(opener) # DEPRECATED in favor of set_proxy() and unset_proxy()
Example #29
Source File: default.py From ru with GNU General Public License v2.0 | 5 votes |
def post_request(page_url, req_data=None, headers=None): if headers is None: headers = {} if antizapret_enabled: opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(), az.AntizapretProxyHandler()) else: opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor()) opener.addheaders = [("User-Agent", USER_AGENT)] conn = urllib_request.Request(page_url, urllib_parse.urlencode(req_data).encode('utf-8'), headers) connection = opener.open(conn) html = connection.read() return html
Example #30
Source File: http.py From suds-py3 with GNU Lesser General Public License v3.0 | 5 votes |
def u2opener(self): """ Create a urllib opener. @return: An opener. @rtype: I{OpenerDirector} """ if self.urlopener is None: return u2.build_opener(*self.u2handlers()) else: return self.urlopener