Python six.moves.urllib_parse.urlparse() Examples
The following are 30
code examples of six.moves.urllib_parse.urlparse().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
six.moves.urllib_parse
, or try the search function
.
Example #1
Source File: general_name.py From teleport with Apache License 2.0 | 6 votes |
def _idna_encode(self, value): idna = _lazy_import_idna() parsed = urllib_parse.urlparse(value) if parsed.port: netloc = ( idna.encode(parsed.hostname) + ":{}".format(parsed.port).encode("ascii") ).decode("ascii") else: netloc = idna.encode(parsed.hostname).decode("ascii") # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. return urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment ))
Example #2
Source File: http.py From flex with MIT License | 6 votes |
def data(self): """ TODO: What is the right way to do this? """ if not self.body: return self.body elif self.body is EMPTY: return EMPTY elif self.content_type and self.content_type.startswith('application/json'): try: if isinstance(self.body, six.binary_type): return json.loads(self.body.decode('utf-8')) else: return json.loads(self.body) except ValueError as e: if isinstance(e, JSONDecodeError): # this will only be True for Python3+ raise e raise JSONDecodeError(str(e)) elif self.content_type == 'application/x-www-form-urlencoded': return dict(urlparse.parse_qsl(self.body)) else: raise NotImplementedError("No parser for content type")
Example #3
Source File: oauth.py From Dailyfresh-B2C with Apache License 2.0 | 6 votes |
def handle_state(self, start_url, target_url): start_query = parse_qs(urlparse(start_url).query) redirect_uri = start_query.get('redirect_uri') if getattr(self.backend, 'STATE_PARAMETER', False): if start_query.get('state'): target_url = url_add_parameters(target_url, { 'state': start_query['state'] }) if redirect_uri and getattr(self.backend, 'REDIRECT_STATE', False): redirect_query = parse_qs(urlparse(redirect_uri).query) if redirect_query.get('redirect_state'): target_url = url_add_parameters(target_url, { 'redirect_state': redirect_query['redirect_state'] }) return target_url
Example #4
Source File: general_name.py From learn_python3_spider with MIT License | 6 votes |
def _idna_encode(self, value): idna = _lazy_import_idna() parsed = urllib_parse.urlparse(value) if parsed.port: netloc = ( idna.encode(parsed.hostname) + ":{}".format(parsed.port).encode("ascii") ).decode("ascii") else: netloc = idna.encode(parsed.hostname).decode("ascii") # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. return urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment ))
Example #5
Source File: test_saml.py From Dailyfresh-B2C with Apache License 2.0 | 6 votes |
def do_start(self): start_url = self.backend.start().url # Modify the start URL to make the SAML request consistent # from test to test: start_url = self.modify_start_url(start_url) # If the SAML Identity Provider recognizes the user, we will # be redirected back to: return_url = self.backend.redirect_uri self.install_http_intercepts(start_url, return_url) response = requests.get(start_url) self.assertTrue(response.url.startswith(return_url)) self.assertEqual(response.text, 'foobar') query_values = dict((k, v[0]) for k, v in parse_qs(urlparse(response.url).query).items()) self.assertNotIn(' ', query_values['SAMLResponse']) self.strategy.set_request_data(query_values, self.backend) return self.backend.complete()
Example #6
Source File: test_saml.py From Dailyfresh-B2C with Apache License 2.0 | 6 votes |
def modify_start_url(self, start_url): """ Given a SAML redirect URL, parse it and change the ID to a consistent value, so the request is always identical. """ # Parse the SAML Request URL to get the XML being sent to TestShib url_parts = urlparse(start_url) query = dict((k, v[0]) for (k, v) in parse_qs(url_parts.query).items()) xml = OneLogin_Saml2_Utils.decode_base64_and_inflate( query['SAMLRequest'] ) # Modify the XML: xml = xml.decode() xml, changed = re.subn(r'ID="[^"]+"', 'ID="TEST_ID"', xml) self.assertEqual(changed, 1) # Update the URL to use the modified query string: query['SAMLRequest'] = OneLogin_Saml2_Utils.deflate_and_base64_encode( xml ) url_parts = list(url_parts) url_parts[4] = urlencode(query) return urlunparse(url_parts)
Example #7
Source File: __init__.py From flex with MIT License | 6 votes |
def validate_deferred_references(schema, context, **kwargs): try: deferred_references = context['deferred_references'] except KeyError: raise KeyError("`deferred_references` not found in context") with ErrorDict() as errors: for reference in deferred_references: parts = urlparse.urlparse(reference) if any((parts.scheme, parts.netloc, parts.path, parts.params, parts.query)): errors.add_error( reference, MESSAGES['reference']['unsupported'].format(reference), ) continue try: jsonpointer.resolve_pointer(schema, parts.fragment) except jsonpointer.JsonPointerException: errors.add_error( reference, MESSAGES['reference']['undefined'].format(reference), )
Example #8
Source File: example_utils.py From taskflow with Apache License 2.0 | 6 votes |
def _make_conf(backend_uri): parsed_url = urllib_parse.urlparse(backend_uri) backend_type = parsed_url.scheme.lower() if not backend_type: raise ValueError("Unknown backend type for uri: %s" % (backend_type)) if backend_type in ('file', 'dir'): conf = { 'path': parsed_url.path, 'connection': backend_uri, } elif backend_type in ('zookeeper',): conf = { 'path': parsed_url.path, 'hosts': parsed_url.netloc, 'connection': backend_uri, } else: conf = { 'connection': backend_uri, } return conf
Example #9
Source File: oauth2.py From spotipy with MIT License | 6 votes |
def get_auth_response(self): logger.info('User authentication requires interaction with your ' 'web browser. Once you enter your credentials and ' 'give authorization, you will be redirected to ' 'a url. Paste that url you were directed to to ' 'complete the authorization.') redirect_info = urlparse(self.redirect_uri) redirect_host, redirect_port = get_host_port(redirect_info.netloc) if redirect_host in ("127.0.0.1", "localhost") and redirect_info.scheme == "http": # Only start a local http server if a port is specified if redirect_port: return self._get_auth_response_local_server(redirect_port) else: logger.warning('Using `%s` as redirect URI without a port. ' 'Specify a port (e.g. `%s:8080`) to allow ' 'automatic retrieval of authentication code ' 'instead of having to copy and paste ' 'the URL your browser is redirected to.', redirect_host, redirect_host) logger.info('Paste that url you were directed to in order to ' 'complete the authorization') return self._get_auth_response_interactive()
Example #10
Source File: handlers.py From jupyter_http_over_ws with Apache License 2.0 | 6 votes |
def _attach_auth_cookies(self): auth_url = self.get_argument(_AUTH_URL_QUERY_PARAM, default='') if not auth_url: raise gen.Return({}) parsed_auth_url = urlparse.urlparse(auth_url) try: _validate_same_domain(self.request, parsed_auth_url) extra_cookies = yield _perform_request_and_extract_cookies( parsed_auth_url, self.ca_certs, self._get_http_client()) except Exception as e: # pylint:disable=broad-except self._on_unhandled_exception(e) raise self.request.headers.update(extra_cookies)
Example #11
Source File: general_name.py From teleport with Apache License 2.0 | 6 votes |
def _idna_encode(self, value): idna = _lazy_import_idna() parsed = urllib_parse.urlparse(value) if parsed.port: netloc = ( idna.encode(parsed.hostname) + ":{}".format(parsed.port).encode("ascii") ).decode("ascii") else: netloc = idna.encode(parsed.hostname).decode("ascii") # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. return urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment ))
Example #12
Source File: general_name.py From teleport with Apache License 2.0 | 6 votes |
def _idna_encode(self, value): parsed = urllib_parse.urlparse(value) if parsed.port: netloc = ( idna.encode(parsed.hostname) + ":{0}".format(parsed.port).encode("ascii") ).decode("ascii") else: netloc = idna.encode(parsed.hostname).decode("ascii") # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. return urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment ))
Example #13
Source File: helpers.py From script.module.resolveurl with GNU General Public License v2.0 | 6 votes |
def get_media_url(url, result_blacklist=None, patterns=None, generic_patterns=True): if patterns is None: patterns = [] scheme = urllib_parse.urlparse(url).scheme if result_blacklist is None: result_blacklist = [] elif isinstance(result_blacklist, str): result_blacklist = [result_blacklist] result_blacklist = list(set(result_blacklist + ['.smil'])) # smil(not playable) contains potential sources, only blacklist when called from here net = common.Net() headers = {'User-Agent': common.RAND_UA} headers.update({'Referer': url}) response = net.http_GET(url, headers=headers) response_headers = response.get_headers(as_dict=True) cookie = response_headers.get('Set-Cookie', None) if cookie: headers.update({'Cookie': cookie}) html = response.content source_list = scrape_sources(html, result_blacklist, scheme, patterns, generic_patterns) source = pick_source(source_list) return source + append_headers(headers)
Example #14
Source File: ctl.py From patroni with MIT License | 6 votes |
def parse_dcs(dcs): if dcs is None: return None elif '//' not in dcs: dcs = '//' + dcs parsed = urlparse(dcs) scheme = parsed.scheme port = int(parsed.port) if parsed.port else None if scheme == '': scheme = ([k for k, v in DCS_DEFAULTS.items() if v['port'] == port] or ['etcd'])[0] elif scheme not in DCS_DEFAULTS: raise PatroniCtlException('Unknown dcs scheme: {}'.format(scheme)) default = DCS_DEFAULTS[scheme] return yaml.safe_load(default['template'].format(host=parsed.hostname or 'localhost', port=port or default['port']))
Example #15
Source File: handlers.py From jupyter_http_over_ws with Apache License 2.0 | 6 votes |
def _attach_auth_cookies(self): auth_url = self.get_argument(_AUTH_URL_QUERY_PARAM, default='') if not auth_url: raise gen.Return() parsed_auth_url = urlparse.urlparse(auth_url) try: _validate_same_domain(self.request, parsed_auth_url) extra_cookies = yield _perform_request_and_extract_cookies( parsed_auth_url, self.ca_certs, self._get_http_client()) except Exception: # pylint:disable=broad-except self.log.exception('Uncaught error when proxying request') raise self.request.headers.update(extra_cookies)
Example #16
Source File: general_name.py From quickstart-git2s3 with Apache License 2.0 | 6 votes |
def _idna_encode(self, value): parsed = urllib_parse.urlparse(value) if parsed.port: netloc = ( idna.encode(parsed.hostname) + ":{0}".format(parsed.port).encode("ascii") ).decode("ascii") else: netloc = idna.encode(parsed.hostname).decode("ascii") # Note that building a URL in this fashion means it should be # semantically indistinguishable from the original but is not # guaranteed to be exactly the same. return urllib_parse.urlunparse(( parsed.scheme, netloc, parsed.path, parsed.params, parsed.query, parsed.fragment ))
Example #17
Source File: helpers.py From script.module.urlresolver with GNU General Public License v2.0 | 6 votes |
def get_media_url(url, result_blacklist=None, patterns=None, generic_patterns=True): if patterns is None: patterns = [] scheme = urllib_parse.urlparse(url).scheme if result_blacklist is None: result_blacklist = [] elif isinstance(result_blacklist, str): result_blacklist = [result_blacklist] result_blacklist = list(set(result_blacklist + ['.smil'])) # smil(not playable) contains potential sources, only blacklist when called from here net = common.Net() headers = {'User-Agent': common.RAND_UA} headers.update({'Referer': url}) response = net.http_GET(url, headers=headers) response_headers = response.get_headers(as_dict=True) cookie = response_headers.get('Set-Cookie', None) if cookie: headers.update({'Cookie': cookie}) html = response.content source_list = scrape_sources(html, result_blacklist, scheme, patterns, generic_patterns) source = pick_source(source_list) return source + append_headers(headers)
Example #18
Source File: query.py From pagure with GNU General Public License v2.0 | 6 votes |
def save_report(session, repo, name, url, username): """ Save the report of issues based on the given URL of the project. """ url_obj = urlparse(url) url = url_obj.geturl().replace(url_obj.query, "") query = {} for k, v in parse_qsl(url_obj.query): if k in query: if isinstance(query[k], list): query[k].append(v) else: query[k] = [query[k], v] else: query[k] = v reports = repo.reports reports[name] = query repo.reports = reports session.add(repo)
Example #19
Source File: __init__.py From patroni with MIT License | 5 votes |
def parse_connection_string(value): """Original Governor stores connection strings for each cluster members if a following format: postgres://{username}:{password}@{connect_address}/postgres Since each of our patroni instances provides own REST API endpoint it's good to store this information in DCS among with postgresql connection string. In order to not introduce new keys and be compatible with original Governor we decided to extend original connection string in a following way: postgres://{username}:{password}@{connect_address}/postgres?application_name={api_url} This way original Governor could use such connection string as it is, because of feature of `libpq` library. This method is able to split connection string stored in DCS into two parts, `conn_url` and `api_url`""" scheme, netloc, path, params, query, fragment = urlparse(value) conn_url = urlunparse((scheme, netloc, path, params, '', fragment)) api_url = ([v for n, v in parse_qsl(query) if n == 'application_name'] or [None])[0] return conn_url, api_url
Example #20
Source File: oauth2.py From spotipy with MIT License | 5 votes |
def get_auth_response(self, state=None): """ Gets a new auth **token** with user interaction """ logger.info('User authentication requires interaction with your ' 'web browser. Once you enter your credentials and ' 'give authorization, you will be redirected to ' 'a url. Paste that url you were directed to to ' 'complete the authorization.') redirect_info = urlparse(self.redirect_uri) redirect_host, redirect_port = get_host_port(redirect_info.netloc) # Implicit Grant tokens are returned in a hash fragment # which is only available to the browser. Therefore, interactive # URL retrival is required. if (redirect_host in ("127.0.0.1", "localhost") and redirect_info.scheme == "http" and redirect_port): logger.warning('Using a local redirect URI with a ' 'port, likely expecting automatic ' 'retrieval. Due to technical limitations, ' 'the authentication token cannot be ' 'automatically retrieved and must be ' 'copied and pasted.') self._open_auth_url(state) logger.info('Paste that url you were directed to in order to ' 'complete the authorization') response = SpotifyImplicitGrant._get_user_input("Enter the URL you " "were redirected to: ") return self.parse_response_token(response, state)
Example #21
Source File: sanitizer.py From ImageFusion with MIT License | 5 votes |
def allowed_token(self, token, token_type): if "data" in token: attrs = dict([(name, val) for name, val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") uri = urlparse.urlparse(val_unescaped) if uri: if uri.scheme not in self.allowed_protocols: del attrs[attr] if uri.scheme == 'data': m = content_type_rgx.match(uri.path) if not m: del attrs[attr] if m.group('content_type') not in self.allowed_content_types: del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in list(attrs.items())] return token
Example #22
Source File: webtest.py From cheroot with BSD 3-Clause "New" or "Revised" License | 5 votes |
def strip_netloc(url): """Return absolute-URI path from URL. Strip the scheme and host from the URL, returning the server-absolute portion. Useful for wrapping an absolute-URI for which only the path is expected (such as in calls to :py:meth:`WebCase.getPage`). >>> strip_netloc('https://google.com/foo/bar?bing#baz') '/foo/bar?bing' >>> strip_netloc('//google.com/foo/bar?bing#baz') '/foo/bar?bing' >>> strip_netloc('/foo/bar?bing#baz') '/foo/bar?bing' """ parsed = urllib_parse.urlparse(url) scheme, netloc, path, params, query, fragment = parsed stripped = '', '', path, params, query, '' return urllib_parse.urlunparse(stripped) # Add any exceptions which your web framework handles # normally (that you don't want server_error to trap).
Example #23
Source File: sanitizer.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def allowed_token(self, token, token_type): if "data" in token: attrs = dict([(name, val) for name, val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") uri = urlparse.urlparse(val_unescaped) if uri: if uri.scheme not in self.allowed_protocols: del attrs[attr] if uri.scheme == 'data': m = content_type_rgx.match(uri.path) if not m: del attrs[attr] if m.group('content_type') not in self.allowed_content_types: del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in list(attrs.items())] return token
Example #24
Source File: sanitizer.py From datafari with Apache License 2.0 | 5 votes |
def allowed_token(self, token, token_type): if "data" in token: attrs = dict([(name, val) for name, val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") uri = urlparse.urlparse(val_unescaped) if uri: if uri.scheme not in self.allowed_protocols: del attrs[attr] if uri.scheme == 'data': m = content_type_rgx.match(uri.path) if not m: del attrs[attr] if m.group('content_type') not in self.allowed_content_types: del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in list(attrs.items())] return token
Example #25
Source File: videozoo.py From script.module.resolveurl with GNU General Public License v2.0 | 5 votes |
def _redirect_test(self, url): opener = urllib_request.build_opener() opener.addheaders = [('User-agent', common.IOS_USER_AGENT)] opener.addheaders = [('Referer', urllib_parse.urlparse(url).netloc)] try: resp = opener.open(url) if url != resp.geturl(): return resp.geturl() else: return url except urllib_error.HTTPError as e: if e.code == 403: if url != e.geturl(): return e.geturl() raise ResolverError('File not found')
Example #26
Source File: simplydebrid.py From script.module.resolveurl with GNU General Public License v2.0 | 5 votes |
def valid_url(self, url, host): if not self.hosts: self.hosts = self.get_all_hosters() if url: try: host = urllib_parse.urlparse(url).hostname except: host = 'unknown' if host.startswith('www.'): host = host.replace('www.', '') if any(host in item for item in self.hosts): return True return False
Example #27
Source File: hmf.py From script.module.resolveurl with GNU General Public License v2.0 | 5 votes |
def __top_domain(self, url): elements = urllib_parse.urlparse(url) domain = elements.netloc or elements.path domain = domain.split('@')[-1].split(':')[0] regex = r"(?:www\.)?([\w\-]*\.[\w\-]{2,5}(?:\.[\w\-]{2,5})?)$" res = re.search(regex, domain) if res: domain = res.group(1) domain = domain.lower() return domain
Example #28
Source File: __init__.py From script.module.resolveurl with GNU General Public License v2.0 | 5 votes |
def scrape_supported(html, regex=None, host_only=False): """ returns a list of links scraped from the html that are supported by resolveurl args: html: the html to be scraped regex: an optional argument to override the default regex which is: href *= *["']([^'"]+ host_only: an optional argument if true to do only host validation vs full url validation (default False) Returns: a list of links scraped from the html that passed validation """ if regex is None: regex = r'''href\s*=\s*['"]([^'"]+)''' links = [] for match in re.finditer(regex, html): stream_url = match.group(1) host = urllib_parse.urlparse(stream_url).hostname if host_only: if host is None: continue if host in host_cache: if host_cache[host]: links.append(stream_url) continue else: hmf = HostedMediaFile(host=host, media_id='dummy') # use dummy media_id to allow host validation else: hmf = HostedMediaFile(url=stream_url) is_valid = hmf.valid_url() host_cache[host] = is_valid if is_valid: links.append(stream_url) return links
Example #29
Source File: sanitizer.py From ImageFusion with MIT License | 5 votes |
def allowed_token(self, token, token_type): if "data" in token: attrs = dict([(name, val) for name, val in token["data"][::-1] if name in self.allowed_attributes]) for attr in self.attr_val_is_uri: if attr not in attrs: continue val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") uri = urlparse.urlparse(val_unescaped) if uri: if uri.scheme not in self.allowed_protocols: del attrs[attr] if uri.scheme == 'data': m = content_type_rgx.match(uri.path) if not m: del attrs[attr] if m.group('content_type') not in self.allowed_content_types: del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and 'xlink:href' in attrs and re.search('^\s*[^#\s].*', attrs['xlink:href'])): del attrs['xlink:href'] if 'style' in attrs: attrs['style'] = self.sanitize_css(attrs['style']) token["data"] = [[name, val] for name, val in list(attrs.items())] return token
Example #30
Source File: proxy.py From bazarr with GNU General Public License v3.0 | 5 votes |
def parse_url(cls, url): parsed = urlparse(url) return cls(proxy_type=parsed.scheme, proxy_address=parsed.hostname, proxy_port=parsed.port, proxy_login=parsed.username, proxy_password=parsed.password)