Python Examples of urllib.parse.urlunsplit

Source File: storage.py From Hands-On-Application-Development-with-PyCharm with MIT License

6 votes

def stored_name(self, name):
        parsed_name = urlsplit(unquote(name))
        clean_name = parsed_name.path.strip()
        hash_key = self.hash_key(clean_name)
        cache_name = self.hashed_files.get(hash_key)
        if cache_name is None:
            if self.manifest_strict:
                raise ValueError("Missing staticfiles manifest entry for '%s'" % clean_name)
            cache_name = self.clean_name(self.hashed_name(name))
        unparsed_name = list(parsed_name)
        unparsed_name[2] = cache_name
        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        if '?#' in name and not unparsed_name[3]:
            unparsed_name[2] += '?'
        return urlunsplit(unparsed_name)

Source File: base.py From Hands-On-Application-Development-with-PyCharm with MIT License

6 votes

def translate_url(url, lang_code):
    """
    Given a URL (absolute or relative), try to get its translated version in
    the `lang_code` language (either by i18n_patterns or by translated regex).
    Return the original URL if no translated version is found.
    """
    parsed = urlsplit(url)
    try:
        match = resolve(parsed.path)
    except Resolver404:
        pass
    else:
        to_be_reversed = "%s:%s" % (match.namespace, match.url_name) if match.namespace else match.url_name
        with override(lang_code):
            try:
                url = reverse(to_be_reversed, args=match.args, kwargs=match.kwargs)
            except NoReverseMatch:
                pass
            else:
                url = urlunsplit((parsed.scheme, parsed.netloc, url, parsed.query, parsed.fragment))
    return url

Source File: http_to_https.py From hypercorn with MIT License

6 votes

def _send_websocket_redirect(self, scope: dict, send: Callable) -> None:
        # If the HTTP version is 2 we should redirect with a https
        # scheme not wss.

        scheme = "wss"
        if scope.get("http_version", "1.1") == "2":
            scheme = "https"

        new_url = urlunsplit(
            (scheme, self.host, scope["raw_path"].decode(), scope["query_string"].decode(), "")
        )
        await send(
            {
                "type": "websocket.http.response.start",
                "status": 307,
                "headers": [(b"location", new_url.encode())],
            }
        )
        await send({"type": "websocket.http.response.body"})

Source File: utils.py From modmail with GNU Affero General Public License v3.0

6 votes

def parse_image_url(url: str) -> str:
    """
    Convert the image URL into a sized Discord avatar.

    Parameters
    ----------
    url : str
        The URL to convert.

    Returns
    -------
    str
        The converted URL, or '' if the URL isn't in the proper format.
    """
    types = [".png", ".jpg", ".gif", ".jpeg", ".webp"]
    url = parse.urlsplit(url)

    if any(url.path.lower().endswith(i) for i in types):
        return parse.urlunsplit((*url[:3], "size=128", url[-1]))
    return ""

Source File: html.py From luscan-devel with GNU General Public License v2.0

6 votes

def smart_urlquote(url):
    "Quotes a URL if it isn't already quoted."
    # Handle IDN before quoting.
    scheme, netloc, path, query, fragment = urlsplit(url)
    try:
        netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
    except UnicodeError: # invalid domain part
        pass
    else:
        url = urlunsplit((scheme, netloc, path, query, fragment))

    url = unquote(force_str(url))
    # See http://bugs.python.org/issue2637
    url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')

    return force_text(url)

Source File: validators.py From luscan-devel with GNU General Public License v2.0

6 votes

def __call__(self, value):
        try:
            super(URLValidator, self).__call__(value)
        except ValidationError as e:
            # Trivial case failed. Try for possible IDN domain
            if value:
                value = force_text(value)
                scheme, netloc, path, query, fragment = urlsplit(value)
                try:
                    netloc = netloc.encode('idna').decode('ascii')  # IDN -> ACE
                except UnicodeError:  # invalid domain part
                    raise e
                url = urlunsplit((scheme, netloc, path, query, fragment))
                super(URLValidator, self).__call__(url)
            else:
                raise
        else:
            url = value

Source File: storage.py From bioforum with MIT License

6 votes

def stored_name(self, name):
        parsed_name = urlsplit(unquote(name))
        clean_name = parsed_name.path.strip()
        hash_key = self.hash_key(clean_name)
        cache_name = self.hashed_files.get(hash_key)
        if cache_name is None:
            if self.manifest_strict:
                raise ValueError("Missing staticfiles manifest entry for '%s'" % clean_name)
            cache_name = self.clean_name(self.hashed_name(name))
        unparsed_name = list(parsed_name)
        unparsed_name[2] = cache_name
        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        if '?#' in name and not unparsed_name[3]:
            unparsed_name[2] += '?'
        return urlunsplit(unparsed_name)

Source File: config.py From freight with Apache License 2.0

6 votes

def docker_init_app(app):
    if "POSTGRES_PORT_5432_TCP_ADDR" in os.environ:
        scheme = "postgresql"
        host = os.environ["POSTGRES_PORT_5432_TCP_ADDR"]
        user = os.environ.get("POSTGRES_ENV_POSTGRES_USER") or "postgres"
        password = os.environ.get("POSTGRES_ENV_POSTGRES_PASSWORD")
        db = os.environ.get("POSTGRES_ENV_POSTGRES_DB") or user
        if user and password:
            netloc = f"{user}:{password}@{host}"
        elif user:
            netloc = f"{user}@{host}"
        else:
            netloc = host
        if not app.config.get("SQLALCHEMY_DATABASE_URI"):
            app.config["SQLALCHEMY_DATABASE_URI"] = urlunsplit(
                (scheme, netloc, db, None, None)
            )

    if "REDIS_PORT_6379_TCP_ADDR" in os.environ:
        scheme = "redis"
        host = os.environ["REDIS_PORT_6379_TCP_ADDR"]
        port = 6379
        netloc = f"{host}:{port}"
        app.config.setdefault("REDIS_URL", urlunsplit((scheme, netloc, "", None, None)))

Source File: utils.py From custom-resource-helper with Apache License 2.0

6 votes

def _send_response(response_url, response_body):
    try:
        json_response_body = json.dumps(response_body)
    except Exception as e:
        msg = "Failed to convert response to json: {}".format(str(e))
        logger.error(msg, exc_info=True)
        response_body = {'Status': 'FAILED', 'Data': {}, 'Reason': msg}
        json_response_body = json.dumps(response_body)
    logger.debug("CFN response URL: {}".format(response_url))
    logger.debug(json_response_body)
    headers = {'content-type': '', 'content-length': str(len(json_response_body))}
    split_url = urlsplit(response_url)
    host = split_url.netloc
    url = urlunsplit(("", "", *split_url[2:]))
    while True:
        try:
            connection = HTTPSConnection(host)
            connection.request(method="PUT", url=url, body=json_response_body, headers=headers)
            response = connection.getresponse()
            logger.info("CloudFormation returned status code: {}".format(response.reason))
            break
        except Exception as e:
            logger.error("Unexpected failure sending response to CloudFormation {}".format(e), exc_info=True)
            time.sleep(5)

Source File: base.py From bioforum with MIT License

6 votes

def translate_url(url, lang_code):
    """
    Given a URL (absolute or relative), try to get its translated version in
    the `lang_code` language (either by i18n_patterns or by translated regex).
    Return the original URL if no translated version is found.
    """
    parsed = urlsplit(url)
    try:
        match = resolve(parsed.path)
    except Resolver404:
        pass
    else:
        to_be_reversed = "%s:%s" % (match.namespace, match.url_name) if match.namespace else match.url_name
        with override(lang_code):
            try:
                url = reverse(to_be_reversed, args=match.args, kwargs=match.kwargs)
            except NoReverseMatch:
                pass
            else:
                url = urlunsplit((parsed.scheme, parsed.netloc, url, parsed.query, parsed.fragment))
    return url

Source File: api_requests.py From soweego with GNU General Public License v3.0

6 votes

def _build_sitelink_url(site, title):
    netloc_builder = []
    split_index = site.find('wiki')
    language = site[:split_index]
    netloc_builder.append(language.replace('_', '-'))
    project = site[split_index:]
    if project == 'wiki':
        project = 'wikipedia'
    if language == 'commons':
        project = 'wikimedia'
    netloc_builder.append(project)
    netloc_builder.append('org')
    url = urlunsplit(
        (
            'https',
            '.'.join(netloc_builder),
            '/wiki/%s' % title.replace(' ', '_'),
            '',
            '',
        )
    )
    LOGGER.debug('Site: %s - Title: %s - Full URL: %s', site, title, url)
    return url

Source File: utils.py From fence with Apache License 2.0

6 votes

def append_query_params(original_url, **kwargs):
    """
    Add additional query string arguments to the given url.

    Example call:
        new_url = append_query_params(
            original_url, error='this is an error',
            another_arg='this is another argument')
    """
    scheme, netloc, path, query_string, fragment = urlsplit(original_url)
    query_params = parse_qs(query_string)
    if kwargs is not None:
        for key, value in kwargs.items():
            query_params[key] = [value]

    new_query_string = urlencode(query_params, doseq=True)
    new_url = urlunsplit((scheme, netloc, path, new_query_string, fragment))
    return new_url

Source File: sources.py From orbit-predictor with MIT License

6 votes

def _fetch_tle(self, path, sate_id, date=None):
        url = urlparse.urljoin(self.url, path)
        url = urlparse.urlparse(url)
        qargs = {'satellite_number': sate_id}
        if date is not None:
            date_str = date.strftime("%Y-%m-%d")
            qargs['date'] = date_str

        query_string = urlencode(qargs)
        url = urlparse.urlunsplit((url.scheme, url.netloc, url.path, query_string, url.fragment))
        headers = {'user-agent': 'orbit-predictor', 'Accept': 'application/json'}
        try:
            response = requests.get(url, headers=headers)
        except requests.exceptions.RequestException as error:
            logger.error("Exception requesting TLE: %s", error)
            raise
        if response.ok and 'lines' in response.json():
            lines = tuple(response.json()['lines'])
            return lines
        else:
            raise ValueError("Error requesting TLE: %s", response.text)

Source File: utils.py From flask-security with MIT License

6 votes

def transform_url(url, qparams=None, **kwargs):
    """ Modify url

    :param url: url to transform (can be relative)
    :param qparams: additional query params to add to end of url
    :param kwargs: pieces of URL to modify - e.g. netloc=localhost:8000
    :return: Modified URL

    .. versionadded:: 3.2.0
    """
    if not url:
        return url
    link_parse = urlsplit(url)
    if qparams:
        current_query = dict(parse_qsl(link_parse.query))
        current_query.update(qparams)
        link_parse = link_parse._replace(query=urlencode(current_query))
    return urlunsplit(link_parse._replace(**kwargs))

Source File: forms.py From Bitpoll with GNU General Public License v3.0

6 votes

def compress(self, data_list):
        user = quote_plus(data_list[1])
        passwd = quote_plus(data_list[2])
        auth = user
        if passwd:
            auth += ':'
            auth += passwd
        parsed = urlsplit(data_list[0])
        if auth:
            host = auth + '@' + parsed.netloc
            return urlunsplit((
                parsed.scheme,
                host,
                parsed.path,
                parsed.query,
                parsed.fragment
            ))
        return parsed.url

Source File: test_config.py From marvin with BSD 3-Clause "New" or "Revised" License

6 votes

def test_sasurl_join(self, sas, exp):
        url = '/marvin/api/cubes/8485-1901/'
        public = sas == 'public'
        test = sas == 'test'
        if sas == 'testpub':
            public = test = True
        sas = 'utah' if sas != 'local' else sas
        config.switchSasUrl(sas, public=public, test=test)

        e = urlsplit(config.sasurl)
        t = urlsplit(url)
        final = urlunsplit(tuple(strjoin(*z) for z in zip(e, t)))
        assert exp == final

    # @pytest.mark.parametrize('set_default, defrel, exp',
    #                          [('MPL-5', 'MPL-5', 'api.sdss.org'),
    #                           ('DR15', 'DR15', 'dr15.sdss.org/api')], indirect=['set_default'])
    # def test_sasurl_default_release(self, set_default, defrel, exp):
    #     assert config.release == defrel
    #     assert exp in config.sasurl
    #

Source File: util.py From pypowervm with Apache License 2.0

6 votes

def extend_basepath(href, add):
    """Extends the base path of an href, accounting for querystring/fragment.

    For example, extend_basepath('http://server:1234/foo?a=b&c=d#frag', '/bar')
    => 'http://server:1234/foo/bar?a=b&c=d#frag'

    :param href: Path or href to augment.  Scheme, netloc, query string, and
                 fragment are allowed but not required.
    :param add: String to add onto the base path of the href.  Must not contain
                unescaped special characters such as '?', '&', '#'.
    :return: The augmented href.
    """
    parsed = urlparse.urlsplit(href)
    basepath = parsed.path + add
    return urlparse.urlunsplit((parsed.scheme, parsed.netloc, basepath,
                                parsed.query, parsed.fragment))

Source File: net.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found'])

Source File: finder.py From rssant with BSD 3-Clause "New" or "Revised" License

6 votes

def _guess_links(self):
        path_segments = self.path.split("/")
        maybe_contains_feed = []
        maybe_feed = []
        root = urlunsplit((self.scheme, self.netloc, "", "", ""))
        maybe_contains_feed.append(ScoredLink(root, 0.5))
        for i in range(len(path_segments)):
            path = "/".join(path_segments[:i])
            url = urlunsplit((self.scheme, self.netloc, path, "", ""))
            maybe_contains_feed.append(ScoredLink(url, 1.0 / (i + 3)))
            for k in MAYBE_FEEDS:
                path = "/".join(path_segments[:i] + [k])
                url = urlunsplit((self.scheme, self.netloc, path, "", ""))
                maybe_feed.append(ScoredLink(url, 1.0 / (i + 4)))
        links = maybe_contains_feed + maybe_feed
        self._merge_links(links)

Source File: net.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found'])

Source File: util.py From byob with GNU General Public License v3.0

5 votes

def pastebin(source, api_key):
    """
    Upload file/data to Pastebin

    `Required`
    :param str source:         data or readable file-like object
    :param str api_dev_key:    Pastebin api_dev_key

    `Optional`
    :param str api_user_key:   Pastebin api_user_key

    """
    import sys
    if sys.version_info[0] > 2:
        from urllib.parse import urlsplit,urlunsplit
    else:
        from urllib2 import urlparse
        urlsplit = urlparse.urlsplit
        urlunsplit = urlparse.urlunsplit
    if isinstance(api_key, str):
        try:
            info = {'api_option': 'paste', 'api_paste_code': normalize(source), 'api_dev_key': api_key}
            paste = post('https://pastebin.com/api/api_post.php', data=info)
            parts = urlsplit(paste)
            result = urlunsplit((parts.scheme, parts.netloc, '/raw' + parts.path, parts.query, parts.fragment)) if paste.startswith('http') else paste
            if not result.endswith('/'):
                result += '/'
            return result
        except Exception as e:
            log("Upload to Pastebin failed with error: {}".format(e))
    else:
        log("No Pastebin API key found")

Source File: util.py From byob with GNU General Public License v3.0

5 votes

def pastebin(source, api_key):
    """
    Upload file/data to Pastebin

    `Required`
    :param str source:         data or readable file-like object
    :param str api_dev_key:    Pastebin api_dev_key

    `Optional`
    :param str api_user_key:   Pastebin api_user_key

    """
    import sys
    if sys.version_info[0] > 2:
        from urllib.parse import urlsplit,urlunsplit
    else:
        from urllib2 import urlparse
        urlsplit = urlparse.urlsplit
        urlunsplit = urlparse.urlunsplit
    if isinstance(api_key, str):
        try:
            info = {'api_option': 'paste', 'api_paste_code': normalize(source), 'api_dev_key': api_key}
            paste = post('https://pastebin.com/api/api_post.php', data=info)
            parts = urlsplit(paste)
            result = urlunsplit((parts.scheme, parts.netloc, '/raw' + parts.path, parts.query, parts.fragment)) if paste.startswith('http') else paste
            if not result.endswith('/'):
                result += '/'
            return result
        except Exception as e:
            log("Upload to Pastebin failed with error: {}".format(e))
    else:
        log("No Pastebin API key found")

Source File: storage.py From Hands-On-Application-Development-with-PyCharm with MIT License

5 votes

def hashed_name(self, name, content=None, filename=None):
        # `filename` is the name of file to hash if `content` isn't given.
        # `name` is the base name to construct the new hashed filename from.
        parsed_name = urlsplit(unquote(name))
        clean_name = parsed_name.path.strip()
        filename = (filename and urlsplit(unquote(filename)).path.strip()) or clean_name
        opened = content is None
        if opened:
            if not self.exists(filename):
                raise ValueError("The file '%s' could not be found with %r." % (filename, self))
            try:
                content = self.open(filename)
            except IOError:
                # Handle directory paths and fragments
                return name
        try:
            file_hash = self.file_hash(clean_name, content)
        finally:
            if opened:
                content.close()
        path, filename = os.path.split(clean_name)
        root, ext = os.path.splitext(filename)
        if file_hash is not None:
            file_hash = ".%s" % file_hash
        hashed_name = os.path.join(path, "%s%s%s" %
                                   (root, file_hash, ext))
        unparsed_name = list(parsed_name)
        unparsed_name[2] = hashed_name
        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        if '?#' in name and not unparsed_name[3]:
            unparsed_name[2] += '?'
        return urlunsplit(unparsed_name)

Source File: views.py From django-oidc-provider with MIT License

5 votes

def dispatch(self, request, *args, **kwargs):
        id_token_hint = request.GET.get('id_token_hint', '')
        post_logout_redirect_uri = request.GET.get('post_logout_redirect_uri', '')
        state = request.GET.get('state', '')
        client = None

        next_page = settings.get('OIDC_LOGIN_URL')
        after_end_session_hook = settings.get('OIDC_AFTER_END_SESSION_HOOK', import_str=True)

        if id_token_hint:
            client_id = client_id_from_id_token(id_token_hint)
            try:
                client = Client.objects.get(client_id=client_id)
                if post_logout_redirect_uri in client.post_logout_redirect_uris:
                    if state:
                        uri = urlsplit(post_logout_redirect_uri)
                        query_params = parse_qs(uri.query)
                        query_params['state'] = state
                        uri = uri._replace(query=urlencode(query_params, doseq=True))
                        next_page = urlunsplit(uri)
                    else:
                        next_page = post_logout_redirect_uri
            except Client.DoesNotExist:
                pass

        after_end_session_hook(
            request=request,
            id_token=id_token_hint,
            post_logout_redirect_uri=post_logout_redirect_uri,
            state=state,
            client=client,
            next_page=next_page
        )

        self.next_page = next_page
        return super(EndSessionView, self).dispatch(request, *args, **kwargs)

Source File: fields.py From Hands-On-Application-Development-with-PyCharm with MIT License

5 votes

def to_python(self, value):

        def split_url(url):
            """
            Return a list of url parts via urlparse.urlsplit(), or raise
            ValidationError for some malformed URLs.
            """
            try:
                return list(urlsplit(url))
            except ValueError:
                # urlparse.urlsplit can raise a ValueError with some
                # misformatted URLs.
                raise ValidationError(self.error_messages['invalid'], code='invalid')

        value = super().to_python(value)
        if value:
            url_fields = split_url(value)
            if not url_fields[0]:
                # If no URL scheme given, assume http://
                url_fields[0] = 'http'
            if not url_fields[1]:
                # Assume that if no domain is provided, that the path segment
                # contains the domain.
                url_fields[1] = url_fields[2]
                url_fields[2] = ''
                # Rebuild the url_fields list, since the domain segment may now
                # contain the path too.
                url_fields = split_url(urlunsplit(url_fields))
            value = urlunsplit(url_fields)
        return value

Source File: backend_address.py From parsec-cloud with GNU Affero General Public License v3.0

5 votes

def to_url(self) -> str:
        _, custom_port = self._parse_port(self._port, self._use_ssl)
        if custom_port:
            netloc = f"{self._hostname}:{custom_port}"
        else:
            netloc = self.hostname
        query = "&".join(f"{k}={quote_plus(v)}" for k, v in self._to_url_get_params())
        return urlunsplit((PARSEC_SCHEME, netloc, quote_plus(self._to_url_get_path()), query, None))

Source File: internet.py From trafaret with BSD 2-Clause "Simplified" License

5 votes

def decode_url_idna(value):
    scheme, netloc, path, query, fragment = urlparse.urlsplit(value)
    netloc = netloc.encode('idna').decode('ascii')  # IDN -> ACE
    return urlparse.urlunsplit((scheme, netloc, path, query, fragment))

Source File: utils.py From fence with Apache License 2.0

5 votes

def split_url_and_query_params(url):
    scheme, netloc, path, query_string, fragment = urlsplit(url)
    query_params = parse_qs(query_string)
    url = urlunsplit((scheme, netloc, path, None, fragment))
    return url, query_params

Source File: authorize.py From django-oidc-provider with MIT License

5 votes

def strip_prompt_login(path):
    """
    Strips 'login' from the 'prompt' query parameter.
    """
    uri = urlsplit(path)
    query_params = parse_qs(uri.query)
    prompt_list = query_params.get('prompt', '')[0].split()
    if 'login' in prompt_list:
        prompt_list.remove('login')
        query_params['prompt'] = ' '.join(prompt_list)
    if not query_params['prompt']:
        del query_params['prompt']
    uri = uri._replace(query=urlencode(query_params, doseq=True))
    return urlunsplit(uri)

Source File: finder.py From rssant with BSD 3-Clause "New" or "Revised" License

5 votes

def _parse_link(self, tag, page_url):
        link_rel = tag.get("rel", "")
        if not isinstance(link_rel, str):
            link_rel = ' '.join(link_rel)
        link_rel = link_rel.lower()
        if link_rel:
            for key in LINK_REL_NOT_FEED:
                if key in link_rel:
                    return None
        link_type = str(tag.get("type", "")).lower()
        if link_type:
            for key in LINK_TYPE_NOT_FEED:
                if key in link_type:
                    return None
        url = tag.get("href", "")
        if not url:
            return None
        if not (url.startswith('http://') or url.startswith('https://')):
            url = urljoin(page_url, url)  # 处理相对路径
        scheme, netloc, path, query, fragment = urlsplit(url)
        base_netloc = '.'.join(netloc.rsplit('.', 2)[-2:])
        if (not netloc) or base_netloc not in self.netloc:
            return None
        if not scheme:
            scheme = self.scheme
        else:
            scheme = scheme.lower()
            if scheme == 'feed':
                scheme = self.scheme
            elif scheme not in {'http', 'https'}:
                return None
        lower_path = path.lower()
        for key in URL_ENDS_NOT_FEED:
            if lower_path.endswith(key):
                return None
        url = urlunsplit((scheme, netloc, path, query, None))
        try:
            validate_url(url)
        except Invalid:
            return None
        return self._score_link(url, lower_path, link_rel, link_type)

Python urllib.parse.urlunsplit() Examples