Python urllib.parse.urlsplit() Examples
The following are 30
code examples of urllib.parse.urlsplit().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urllib.parse
, or try the search function
.
Example #1
Source File: base.py From bioforum with MIT License | 6 votes |
def translate_url(url, lang_code): """ Given a URL (absolute or relative), try to get its translated version in the `lang_code` language (either by i18n_patterns or by translated regex). Return the original URL if no translated version is found. """ parsed = urlsplit(url) try: match = resolve(parsed.path) except Resolver404: pass else: to_be_reversed = "%s:%s" % (match.namespace, match.url_name) if match.namespace else match.url_name with override(lang_code): try: url = reverse(to_be_reversed, args=match.args, kwargs=match.kwargs) except NoReverseMatch: pass else: url = urlunsplit((parsed.scheme, parsed.netloc, url, parsed.query, parsed.fragment)) return url
Example #2
Source File: actionform.py From gprime with GNU General Public License v2.0 | 6 votes |
def upload(url, filename=None): from urllib.request import Request, urlopen from urllib.parse import urlsplit import shutil def getFilename(url,openUrl): if 'Content-Disposition' in openUrl.info(): # If the response has Content-Disposition, try to get filename from it cd = dict([x.strip().split('=') if '=' in x else (x.strip(),'') for x in openUrl.info().split(';')]) if 'filename' in cd: fname = cd['filename'].strip("\"'") if fname: return fname # if no filename was found above, parse it out of the final URL. return os.path.basename(urlsplit(openUrl.url)[2]) r = urlopen(Request(url)) success = None try: filename = filename or "/tmp/%s" % getFilename(url,r) with open(filename, 'wb') as f: shutil.copyfileobj(r,f) success = filename finally: r.close() return success
Example #3
Source File: http_headers_plugin.py From sslyze with GNU Affero General Public License v3.0 | 6 votes |
def _detect_http_redirection(http_response: HTTPResponse, server_host_name: str, server_port: int) -> Optional[str]: """If the HTTP response contains a redirection to the same server, return the path to the new location. """ next_location_path = None if 300 <= http_response.status < 400: location_header = _extract_first_header_value(http_response, "Location") if location_header: parsed_location = urlsplit(location_header) is_relative_url = False if parsed_location.hostname else True if is_relative_url: # Yes, to a relative URL; follow the redirection next_location_path = location_header else: is_absolute_url_to_same_hostname = parsed_location.hostname == server_host_name absolute_url_port = 443 if parsed_location.port is None else parsed_location.port is_absolute_url_to_same_port = absolute_url_port == server_port if is_absolute_url_to_same_hostname and is_absolute_url_to_same_port: # Yes, to an absolute URL to the same server; follow the redirection next_location_path = f"{parsed_location.path}" if parsed_location.query: next_location_path += f"?{parsed_location.query}" return next_location_path
Example #4
Source File: zmirror.py From zmirror with MIT License | 6 votes |
def extract_url_path_and_query(full_url=None, no_query=False): """ Convert http://foo.bar.com/aaa/p.html?x=y to /aaa/p.html?x=y :param no_query: :type full_url: str :param full_url: full url :return: str """ if full_url is None: full_url = request.url split = urlsplit(full_url) result = split.path or "/" if not no_query and split.query: result += '?' + split.query return result # ################# End Client Request Handler ################# # ################# Begin Middle Functions #################
Example #5
Source File: storage_manager.py From OasisPlatform with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _strip_signing_parameters(self, url): """ Duplicated Unsiged URLs from Django-Stroage Method from: https://github.com/jschneier/django-storages/blob/master/storages/backends/s3boto3.py Boto3 does not currently support generating URLs that are unsigned. Instead we take the signed URLs and strip any querystring params related to signing and expiration. Note that this may end up with URLs that are still invalid, especially if params are passed in that only work with signed URLs, e.g. response header params. The code attempts to strip all query parameters that match names of known parameters from v2 and v4 signatures, regardless of the actual signature version used. """ split_url = urlsplit(url) qs = parse_qsl(split_url.query, keep_blank_values=True) blacklist = { 'x-amz-algorithm', 'x-amz-credential', 'x-amz-date', 'x-amz-expires', 'x-amz-signedheaders', 'x-amz-signature', 'x-amz-security-token', 'awsaccesskeyid', 'expires', 'signature', } filtered_qs = ((key, val) for key, val in qs if key.lower() not in blacklist) # Note: Parameters that did not have a value in the original query string will have # an '=' sign appended to it, e.g ?foo&bar becomes ?foo=&bar= joined_qs = ('='.join(keyval) for keyval in filtered_qs) split_url = split_url._replace(query="&".join(joined_qs)) return split_url.geturl()
Example #6
Source File: cache.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def path(url, project, include_file=False, makedirs=False): if not Cache.CACHE_DIR: raise Exception('Cache.init() must be called first') parts = [Cache.CACHE_DIR] o = urlsplit(url) parts.append(o.hostname) if project: parts.append(project) directory = os.path.join(*parts) if not os.path.exists(directory) and makedirs: os.makedirs(directory) if include_file: parts.append(hashlib.sha1(url.encode('utf-8')).hexdigest()) return os.path.join(*parts) return directory
Example #7
Source File: storage.py From bioforum with MIT License | 6 votes |
def stored_name(self, name): parsed_name = urlsplit(unquote(name)) clean_name = parsed_name.path.strip() hash_key = self.hash_key(clean_name) cache_name = self.hashed_files.get(hash_key) if cache_name is None: if self.manifest_strict: raise ValueError("Missing staticfiles manifest entry for '%s'" % clean_name) cache_name = self.clean_name(self.hashed_name(name)) unparsed_name = list(parsed_name) unparsed_name[2] = cache_name # Special casing for a @font-face hack, like url(myfont.eot?#iefix") # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax if '?#' in name and not unparsed_name[3]: unparsed_name[2] += '?' return urlunsplit(unparsed_name)
Example #8
Source File: cache.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def delete(url): url = unquote(url) match, project = Cache.match(url) if match: path = Cache.path(url, project, include_file=True) # Rather then wait for last updated statistics to expire, remove the # project cache if applicable. if project: apiurl, _ = Cache.spliturl(url) if project.isdigit(): # Clear target project cache upon request acceptance. project = osc.core.get_request(apiurl, project).actions[0].tgt_project Cache.delete_project(apiurl, project) if os.path.exists(path): if conf.config['debug']: print('CACHE_DELETE', url, file=sys.stderr) os.remove(path) # Also delete version without query. This does not handle other # variations using different query strings. Handy for PUT with ?force=1. o = urlsplit(url) if o.query != '': url_plain = SplitResult(o.scheme, o.netloc, o.path, '', o.fragment).geturl() Cache.delete(url_plain)
Example #9
Source File: cache.py From openSUSE-release-tools with GNU General Public License v2.0 | 6 votes |
def delete(url): url = unquote(url) match, project = Cache.match(url) if match: path = Cache.path(url, project, include_file=True) # Rather then wait for last updated statistics to expire, remove the # project cache if applicable. if project: apiurl, _ = Cache.spliturl(url) if project.isdigit(): # Clear target project cache upon request acceptance. project = osc.core.get_request(apiurl, project).actions[0].tgt_project Cache.delete_project(apiurl, project) if os.path.exists(path): if conf.config['debug']: print('CACHE_DELETE', url, file=sys.stderr) os.remove(path) # Also delete version without query. This does not handle other # variations using different query strings. Handy for PUT with ?force=1. o = urlsplit(url) if o.query != '': url_plain = SplitResult(o.scheme, o.netloc, o.path, '', o.fragment).geturl() Cache.delete(url_plain)
Example #10
Source File: request.py From bioforum with MIT License | 6 votes |
def build_absolute_uri(self, location=None): """ Build an absolute URI from the location and the variables available in this request. If no ``location`` is specified, bulid the absolute URI using request.get_full_path(). If the location is absolute, convert it to an RFC 3987 compliant URI and return it. If location is relative or is scheme-relative (i.e., ``//example.com/``), urljoin() it to a base URL constructed from the request variables. """ if location is None: # Make it an absolute url (but schemeless and domainless) for the # edge case that the path starts with '//'. location = '//%s' % self.get_full_path() bits = urlsplit(location) if not (bits.scheme and bits.netloc): current_uri = '{scheme}://{host}{path}'.format(scheme=self.scheme, host=self.get_host(), path=self.path) # Join the constructed URL with the provided location, which will # allow the provided ``location`` to apply query strings to the # base path as well as override the host, if it begins with // location = urljoin(current_uri, location) return iri_to_uri(location)
Example #11
Source File: tftp.py From dionaea with GNU General Public License v2.0 | 6 votes |
def handle_incident(self, icd): url = icd.get("url") if isinstance(url, bytes): try: url = url.decode(encoding="utf-8") except UnicodeEncodeError as e: logger.warning("Error decoding URL %s", url, exc_info=True) return if url.startswith('tftp://'): # python fails parsing tftp://, ftp:// works, so ... logger.info("do download") x = parse.urlsplit(url[1:]) if x.netloc == '0.0.0.0': logger.info("Discarding download from INADDR_ANY") return try: con = icd.con except AttributeError: con = None t=TftpClient() t.download(con, x.netloc, 69, x.path[1:], url)
Example #12
Source File: base.py From bugatsinho.github.io with GNU General Public License v3.0 | 6 votes |
def unshorten(self, uri, type=None): domain = urlsplit(uri).netloc if not domain: return uri, "No domain found in URI!" had_google_outbound, uri = self._clear_google_outbound_proxy(uri) if re.search(self._adfly_regex, domain, re.IGNORECASE) or type == 'adfly': return self._unshorten_adfly(uri) if re.search(self._adfocus_regex, domain, re.IGNORECASE) or type == 'adfocus': return self._unshorten_adfocus(uri) if re.search(self._linkbucks_regex, domain, re.IGNORECASE) or type == 'linkbucks': return self._unshorten_linkbucks(uri) if re.search(self._lnxlu_regex, domain, re.IGNORECASE) or type == 'lnxlu': return self._unshorten_lnxlu(uri) if re.search(self._shst_regex, domain, re.IGNORECASE): return self._unshorten_shst(uri) if re.search(self._hrefli_regex, domain, re.IGNORECASE): return self._unshorten_hrefli(uri) if re.search(self._anonymz_regex, domain, re.IGNORECASE): return self._unshorten_anonymz(uri) return uri, 200
Example #13
Source File: yum.py From atomic-reactor with BSD 3-Clause "New" or "Revised" License | 6 votes |
def filename(self): '''Returns the filename to be used for saving the repo file. The filename is derived from the repo url by injecting a suffix after the name and before the file extension. This suffix is a partial md5 checksum of the full repourl. This avoids multiple repos from being written to the same file. ''' urlpath = unquote(urlsplit(self.repourl, allow_fragments=False).path) basename = os.path.basename(urlpath) if not basename.endswith(REPO_SUFFIX): basename += REPO_SUFFIX if self.add_hash: suffix = '-' + md5(self.repourl.encode('utf-8')).hexdigest()[:5] # nosec else: suffix = '' final_name = suffix.join(os.path.splitext(basename)) return final_name
Example #14
Source File: modifier.py From selenium-wire with MIT License | 6 votes |
def _rewrite_url(self, request): with self._lock: rewrite_rules = self._rewrite_rules[:] original_netloc = urlsplit(request.path).netloc for pattern, replacement in rewrite_rules: modified, count = pattern.subn(replacement, request.path) if count > 0: request.path = modified break modified_netloc = urlsplit(request.path).netloc if original_netloc != modified_netloc: # Modify the Host header if it exists if 'Host' in request.headers: request.headers['Host'] = modified_netloc
Example #15
Source File: test_confirmable.py From flask-security with MIT License | 6 votes |
def test_spa_get(app, client): """ Test 'single-page-application' style redirects This uses json only. """ with capture_flashes() as flashes: with capture_registrations() as registrations: response = client.post( "/register", json=dict(email="dude@lp.com", password="awesome sunset"), headers={"Content-Type": "application/json"}, ) assert response.headers["Content-Type"] == "application/json" token = registrations[0]["confirm_token"] response = client.get("/confirm/" + token) assert response.status_code == 302 split = urlsplit(response.headers["Location"]) assert "localhost:8081" == split.netloc assert "/confirm-redirect" == split.path qparams = dict(parse_qsl(split.query)) assert qparams["email"] == "dude@lp.com" # Arguably for json we shouldn't have any - this is buried in register_user # but really shouldn't be. assert len(flashes) == 1
Example #16
Source File: test_unified_signin.py From flask-security with MIT License | 6 votes |
def test_tf_link_spa(app, client, get_message): # Verify two-factor required when using magic link and SPA # This currently isn't supported and should redirect to an error. with app.mail.record_messages() as outbox: response = client.post( "/us-signin/send-code", data=dict(identity="matt@lp.com", chosen_method="email"), follow_redirects=True, ) assert response.status_code == 200 assert b"Sign In" in response.data matcher = re.match( r".*(http://[^\s*]*).*", outbox[0].body, re.IGNORECASE | re.DOTALL ) magic_link = matcher.group(1) response = client.get(magic_link, follow_redirects=False) split = urlsplit(response.location) assert "localhost:8081" == split.netloc assert "/login-error" == split.path qparams = dict(parse_qsl(split.query)) assert qparams["tf_required"] == "1" assert qparams["email"] == "matt@lp.com"
Example #17
Source File: test_passwordless.py From flask-security with MIT License | 6 votes |
def test_spa_get(app, client): """ Test 'single-page-application' style redirects This uses json only. """ with capture_flashes() as flashes: with capture_passwordless_login_requests() as requests: response = client.post( "/login", json=dict(email="matt@lp.com"), headers={"Content-Type": "application/json"}, ) assert response.headers["Content-Type"] == "application/json" token = requests[0]["login_token"] response = client.get("/login/" + token) assert response.status_code == 302 split = urlsplit(response.headers["Location"]) assert "localhost:8081" == split.netloc assert "/login-redirect" == split.path qparams = dict(parse_qsl(split.query)) assert qparams["email"] == "matt@lp.com" assert len(flashes) == 0
Example #18
Source File: utils.py From flask-security with MIT License | 6 votes |
def transform_url(url, qparams=None, **kwargs): """ Modify url :param url: url to transform (can be relative) :param qparams: additional query params to add to end of url :param kwargs: pieces of URL to modify - e.g. netloc=localhost:8000 :return: Modified URL .. versionadded:: 3.2.0 """ if not url: return url link_parse = urlsplit(url) if qparams: current_query = dict(parse_qsl(link_parse.query)) current_query.update(qparams) link_parse = link_parse._replace(query=urlencode(current_query)) return urlunsplit(link_parse._replace(**kwargs))
Example #19
Source File: client.py From bioforum with MIT License | 5 votes |
def _handle_redirects(self, response, **extra): """ Follow any redirects by requesting responses from the server using GET. """ response.redirect_chain = [] while response.status_code in (301, 302, 303, 307): response_url = response.url redirect_chain = response.redirect_chain redirect_chain.append((response_url, response.status_code)) url = urlsplit(response_url) if url.scheme: extra['wsgi.url_scheme'] = url.scheme if url.hostname: extra['SERVER_NAME'] = url.hostname if url.port: extra['SERVER_PORT'] = str(url.port) # Prepend the request path to handle relative path redirects path = url.path if not path.startswith('/'): path = urljoin(response.request['PATH_INFO'], path) response = self.get(path, QueryDict(url.query), follow=False, **extra) response.redirect_chain = redirect_chain if redirect_chain[-1] in redirect_chain[:-1]: # Check that we're not redirecting to somewhere we've already # been to, to prevent loops. raise RedirectCycleError("Redirect loop detected.", last_response=response) if len(redirect_chain) > 20: # Such a lengthy chain likely also means a loop, but one with # a growing path, changing view, or changing query argument; # 20 is the value of "network.http.redirection-limit" from Firefox. raise RedirectCycleError("Too many redirects.", last_response=response) return response
Example #20
Source File: __init__.py From bdbag with Apache License 2.0 | 5 votes |
def inspect_path(path): abs_path = os.path.abspath(path) exists = os.path.exists(abs_path) is_uri = is_file = is_dir = False if not exists: upr = urlsplit(path) drive, tail = os.path.splitdrive(path) if upr.scheme and upr.scheme.lower() != drive.rstrip(":").lower(): is_uri = True if not is_uri: is_file = os.path.isfile(abs_path) is_dir = os.path.isdir(abs_path) return is_file, is_dir, is_uri
Example #21
Source File: Downloader.py From OMR-Datasets with MIT License | 5 votes |
def download_file(url, destination_filename=None) -> str: u = urllib2.urlopen(url) scheme, netloc, path, query, fragment = urlparse.urlsplit(url) filename = os.path.basename(path) if not filename: filename = 'downloaded.file' if destination_filename: filename = destination_filename filename = os.path.abspath(filename) with open(filename, 'wb') as f: meta = u.info() meta_func = meta.getheaders if hasattr(meta, 'getheaders') else meta.get_all meta_length = meta_func("Content-Length") file_size = None if meta_length: file_size = int(meta_length[0]) print("Downloading: {0} Bytes: {1} into {2}".format(url, file_size, filename)) with tqdm(total=file_size, desc="Downloading (bytes)") as progress_bar: file_size_dl = 0 block_sz = 8192 while True: buffer = u.read(block_sz) if not buffer: break file_size_dl += len(buffer) f.write(buffer) if file_size: progress_bar.update(len(buffer)) print() return filename
Example #22
Source File: vcs_helpers.py From python-semantic-release with MIT License | 5 votes |
def get_repository_owner_and_name() -> Tuple[str, str]: """ Check the 'origin' remote to get the owner and name of the remote repository. :return: A tuple of the owner and name. """ url = repo.remote("origin").url split_url = urlsplit(url) # Select the owner and name as regex groups parts = re.search(r"[:/]([^:]+)/([^/]*?)(.git)?$", split_url.path) if not parts: raise HvcsRepoParseError return parts.group(1), parts.group(2)
Example #23
Source File: images.py From idunn with Apache License 2.0 | 5 votes |
def get_url_remote_thumbnail( self, source, width=0, height=0, bestFit=True, progressive=False, animated=False ): displayErrorImage = False salt = self.get_salt() token = f"{source}{width}x{height}{salt}" hash = hashlib.sha256(bytes(token, encoding="utf8")).hexdigest() base_url = self.get_thumbr_url(hash) size = f"{width}x{height}" hashURLpart = f"{hash[0]}/{hash[1]}/{hash[2:]}" url_path = urlsplit(source).path filename = posixpath.basename(unquote(url_path)) if not bool(re.match(r"^.*\.(jpg|jpeg|png|gif)$", filename, re.IGNORECASE)): filename += ".jpg" params = urllib.parse.urlencode( { "u": source, "q": 1 if displayErrorImage else 0, "b": 1 if bestFit else 0, "p": 1 if progressive else 0, "a": 1 if animated else 0, } ) return base_url + "/" + size + "/" + hashURLpart + "/" + filename + "?" + params
Example #24
Source File: routing.py From plugin.video.sparkle with GNU General Public License v3.0 | 5 votes |
def run(self, argv=sys.argv): if len(argv) > 2: self.args = parse_qs(argv[2].lstrip('?')) path = urlsplit(argv[0]).path or '/' self._dispatch(path)
Example #25
Source File: request.py From selenium-wire with MIT License | 5 votes |
def querystring(self): """Get the query string from the request. Returns: The query string. """ return urlsplit(self.path).query
Example #26
Source File: routing.py From plugin.video.sparkle with GNU General Public License v3.0 | 5 votes |
def match(self, path): """ Check if path matches this rule. Returns a dictionary of the extracted arguments if match, otherwise None. """ # match = self._regex.search(urlsplit(path).path) match = self._regex.search(path) return match.groupdict() if match else None
Example #27
Source File: web.py From teleport with Apache License 2.0 | 5 votes |
def authenticated(method): """Decorate methods with this to require that the user be logged in. If the user is not logged in, they will be redirected to the configured `login url <RequestHandler.get_login_url>`. If you configure a login url with a query parameter, Tornado will assume you know what you're doing and use it as-is. If not, it will add a `next` parameter so the login page knows where to send you once you're logged in. """ @functools.wraps(method) def wrapper(self, *args, **kwargs): if not self.current_user: if self.request.method in ("GET", "HEAD"): url = self.get_login_url() if "?" not in url: if urlparse.urlsplit(url).scheme: # if login url is absolute, make next absolute too next_url = self.request.full_url() else: next_url = self.request.uri url += "?" + urlencode(dict(next=next_url)) self.redirect(url) return raise HTTPError(403) return method(self, *args, **kwargs) return wrapper
Example #28
Source File: utils.py From flask-security with MIT License | 5 votes |
def propagate_next(url): # return either URL or, if URL already has a ?next=xx, return that. url_next = urlsplit(url) qparams = parse_qs(url_next.query) if "next" in qparams: return qparams["next"][0] return url
Example #29
Source File: utils.py From zmirror with MIT License | 5 votes |
def embed_real_url_to_embedded_url(real_url_raw, url_mime, escape_slash=False): """ 将url的参数(?q=some&foo=bar)编码到url路径中, 并在url末添加一个文件扩展名 在某些对url参数支持不好的CDN中, 可以减少错误 `cdn_redirect_encode_query_str_into_url`设置依赖于本函数, 详细说明可以看配置文件中的对应部分 解码由 extract_real_url_from_embedded_url() 函数进行, 对应的例子也请看这个函数 :rtype: str """ # dbgprint(real_url_raw, url_mime, escape_slash) if escape_slash: real_url = real_url_raw.replace(r'\/', '/') else: real_url = real_url_raw url_sp = urlsplit(real_url) if not url_sp.query: # no query, needn't rewrite return real_url_raw byte_query = url_sp.query.encode() if len(byte_query) > 128: # 当查询参数太长时, 进行gzip压缩 gzip_label = 'z' # 进行压缩后的参数, 会在标识区中添加一个z byte_query = zlib.compress(byte_query) else: gzip_label = '' b64_query = base64.urlsafe_b64encode(byte_query).decode() # dbgprint(url_mime) mixed_path = url_sp.path + '_' + _url_salt + gzip_label + '_.' \ + b64_query \ + '._' + _url_salt + '_.' + mime_to_use_cdn[url_mime] result = urlunsplit((url_sp.scheme, url_sp.netloc, mixed_path, '', '')) if escape_slash: result = s_esc(result) # dbgprint('embed:', real_url_raw, 'to:', result) return result
Example #30
Source File: humblebundle.py From humblebundle with GNU General Public License v3.0 | 5 votes |
def _download_basename(self, d): basename = osp.basename(urlsplit(d.get('url', {}).get('web', "")).path) return basename