Python Examples of urlparse.urlunsplit

Source File: utils.py From kano-burners with GNU General Public License v2.0

6 votes

def url_fix(s, charset='utf-8'):
    '''
    Sometimes you get an URL by a user that just isn't a real
    URL because it contains unsafe characters like ' ' and so on.  This
    function can fix some of the problems in a similar way browsers
    handle data entered by the user:

    >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffsklärung)')
    'http://de.wikipedia.org/wiki/Elf%20%28Begriffskl%C3%A4rung%29'

    :param s: Url address.
    :type s: string
    :param charset: The target charset for the URL if the url was
                    given as unicode string. Default is 'utf-8'.
    :type charset: string
    :rtype: string
                    
    (taken from `werkzeug.utils <http://werkzeug.pocoo.org/docs/utils/>`_)
    '''
    if sys.version_info < (3, 0) and isinstance(s, unicode):
        s = s.encode(charset, 'ignore')
    scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
    path = urllib.quote(path, '/%')
    qs = urllib.quote_plus(qs, ':&=')
    return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))

Source File: fixture.py From mishkal with GNU General Public License v3.0

6 votes

def goto(self, href, method='get', **args):
        """
        Go to the (potentially relative) link ``href``, using the
        given method (``'get'`` or ``'post'``) and any extra arguments
        you want to pass to the ``app.get()`` or ``app.post()``
        methods.

        All hostnames and schemes will be ignored.
        """
        scheme, host, path, query, fragment = urlparse.urlsplit(href)
        # We
        scheme = host = fragment = ''
        href = urlparse.urlunsplit((scheme, host, path, query, fragment))
        href = urlparse.urljoin(self.request.full_url, href)
        method = method.lower()
        assert method in ('get', 'post'), (
            'Only "get" or "post" are allowed for method (you gave %r)'
            % method)
        if method == 'get':
            method = self.test_app.get
        else:
            method = self.test_app.post
        return method(href, **args)

Source File: net.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found'])

Source File: curl.py From pypath with GNU General Public License v3.0

6 votes

def url_fix(self, charset = 'utf-8'):
        """
        From http://stackoverflow.com/a/121017/854988
        """

        if self.bypass_url_encoding:

            return

        if type(self.url) is bytes:

            self.url = self._bytes_to_unicode(self.url, encoding = charset)

        scheme, netloc, path, qs, anchor = urlparse.urlsplit(self.url)

        if self.force_quote or not self.is_quoted(path):

            path = urllib.quote(path, '/%')

        if self.force_quote or not self.is_quoted_plus(qs):

            qs = urllib.quote_plus(qs, '& = ')

        self.url = urlparse.urlunsplit((scheme, netloc, path, qs, anchor))

Source File: validators.py From luscan-devel with GNU General Public License v2.0

6 votes

def __call__(self, value):
        try:
            super(URLValidator, self).__call__(value)
        except ValidationError as e:
            # Trivial case failed. Try for possible IDN domain
            if value:
                value = force_text(value)
                scheme, netloc, path, query, fragment = urlsplit(value)
                try:
                    netloc = netloc.encode('idna').decode('ascii')  # IDN -> ACE
                except UnicodeError:  # invalid domain part
                    raise e
                url = urlunsplit((scheme, netloc, path, query, fragment))
                super(URLValidator, self).__call__(url)
            else:
                raise
        else:
            url = value

Source File: html.py From luscan-devel with GNU General Public License v2.0

6 votes

def smart_urlquote(url):
    "Quotes a URL if it isn't already quoted."
    # Handle IDN before quoting.
    scheme, netloc, path, query, fragment = urlsplit(url)
    try:
        netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
    except UnicodeError: # invalid domain part
        pass
    else:
        url = urlunsplit((scheme, netloc, path, query, fragment))

    url = unquote(force_str(url))
    # See http://bugs.python.org/issue2637
    url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')

    return force_text(url)

Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0

6 votes

def _convert_to_idn(url):
    """Convert a URL to IDN notation"""
    # this function should only be called with a unicode string
    # strategy: if the host cannot be encoded in ascii, then
    # it'll be necessary to encode it in idn form
    parts = list(urlparse.urlsplit(url))
    try:
        parts[1].encode('ascii')
    except UnicodeEncodeError:
        # the url needs to be converted to idn notation
        host = parts[1].rsplit(':', 1)
        newhost = []
        port = u''
        if len(host) == 2:
            port = host.pop()
        for h in host[0].split('.'):
            newhost.append(h.encode('idna').decode('utf-8'))
        parts[1] = '.'.join(newhost)
        if port:
            parts[1] += ':' + port
        return urlparse.urlunsplit(parts)
    else:
        return url

Source File: feedparser.py From RSSNewsGAE with Apache License 2.0

6 votes

def _convert_to_idn(url):
    """Convert a URL to IDN notation"""
    # this function should only be called with a unicode string
    # strategy: if the host cannot be encoded in ascii, then
    # it'll be necessary to encode it in idn form
    parts = list(urlparse.urlsplit(url))
    try:
        parts[1].encode('ascii')
    except UnicodeEncodeError:
        # the url needs to be converted to idn notation
        host = parts[1].rsplit(':', 1)
        newhost = []
        port = u''
        if len(host) == 2:
            port = host.pop()
        for h in host[0].split('.'):
            newhost.append(h.encode('idna').decode('utf-8'))
        parts[1] = '.'.join(newhost)
        if port:
            parts[1] += ':' + port
        return urlparse.urlunsplit(parts)
    else:
        return url

Source File: net.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found'])

Source File: utils.py From openprocurement.api with Apache License 2.0

6 votes

def generate_docservice_url(request, doc_id, temporary=True, prefix=None):
    docservice_key = getattr(request.registry, 'docservice_key', None)
    parsed_url = urlparse(request.registry.docservice_url)
    query = {}
    if temporary:
        expires = int(ttime()) + 300  # EXPIRES
        mess = "{}\0{}".format(doc_id, expires)
        query['Expires'] = expires
    else:
        mess = doc_id
    if prefix:
        mess = '{}/{}'.format(prefix, mess)
        query['Prefix'] = prefix
    query['Signature'] = quote(b64encode(docservice_key.signature(mess.encode("utf-8"))))
    query['KeyID'] = docservice_key.hex_vk()[:8]
    return urlunsplit((parsed_url.scheme, parsed_url.netloc, '/get/{}'.format(doc_id), urlencode(query), ''))

Source File: w9_xss.py From w9scan with GNU General Public License v2.0

6 votes

def audit(arg):

    ooO0oooOoO0 = arg
    II11i = urlparse.urlparse(ooO0oooOoO0)
    i1oOOoo00O0O = urlparse.urlunsplit((II11i.scheme, II11i.netloc, II11i.path, "", ""))
    Oo0Ooo = urlparse.parse_qsl(II11i.query)

    i1111 = ['__VIEWSTATE', 'IbtnEnter.x', 'IbtnEnter.y']
    i11 = ["GET", "POST"]

    for I11 in i11:

        for O0O0OO0O0O0, iiiii in Oo0Ooo:
            if O0O0OO0O0O0 in i1111:
                continue

            debug('[XSS] <%s> %s %s', I11, O0O0OO0O0O0, i1oOOoo00O0O)
            Oo0o0000o0o0 = iI1(I11, i1oOOoo00O0O, Oo0Ooo, O0O0OO0O0O0, iiiii)

            if Oo0o0000o0o0:
                security_info('<%s> %s' % (I11, Oo0o0000o0o0[1]))
                return

Source File: feedparser.py From telegram-robot-rss with Mozilla Public License 2.0

6 votes

def _convert_to_idn(url):
    """Convert a URL to IDN notation"""
    # this function should only be called with a unicode string
    # strategy: if the host cannot be encoded in ascii, then
    # it'll be necessary to encode it in idn form
    parts = list(urlparse.urlsplit(url))
    try:
        parts[1].encode('ascii')
    except UnicodeEncodeError:
        # the url needs to be converted to idn notation
        host = parts[1].rsplit(':', 1)
        newhost = []
        port = u''
        if len(host) == 2:
            port = host.pop()
        for h in host[0].split('.'):
            newhost.append(h.encode('idna').decode('utf-8'))
        parts[1] = '.'.join(newhost)
        if port:
            parts[1] += ':' + port
        return urlparse.urlunsplit(parts)
    else:
        return url

Source File: server.py From mattermost-integration-giphy with Apache License 2.0

6 votes

def giphy_translate(text):
    """
    Giphy translate method, uses the Giphy API to find an appropriate gif url
    """

    params = {}
    params['s'] = text
    params['rating'] = RATING
    params['api_key'] = GIPHY_API_KEY

    resp = requests.get('https://api.giphy.com/v1/gifs/translate', params=params, verify=True)

    if resp.status_code is not requests.codes.ok:
        print('Encountered error using Giphy API, text=%s, status=%d, response_body=%s' % (text, resp.status_code, resp.json()))
        return ''

    resp_data = resp.json()

    url = list(urlsplit(resp_data['data']['images']['original']['url']))
    url[0] = SCHEME.lower()
    return urlunsplit(url)

Source File: url.py From snippet with MIT License

5 votes

def geturl(self):
        scheme = self.scheme if self.scheme else self.url.scheme
        netloc = self.netloc if self.netloc else self.url.netloc
        url = self.path if self.path else self.url.path
        params = self.params if self.params else self.url.params
        query = self.query if self.query else self.url.query
        fragment = self.fragment if self.fragment else self.url.fragment

        if params:
            url = "%s;%s" % (url, params)
        return urlunsplit((scheme, netloc, url, query, fragment))

Source File: iri2uri.py From alfred-gmail with MIT License

5 votes

def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri, unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode("idna")
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri

Source File: iri2uri.py From luci-py with Apache License 2.0

5 votes

def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri, unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode("idna")
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri

Source File: fields.py From luscan-devel with GNU General Public License v2.0

5 votes

def to_python(self, value):

        def split_url(url):
            """
            Returns a list of url parts via ``urlparse.urlsplit`` (or raises a
            ``ValidationError`` exception for certain).
            """
            try:
                return list(urlsplit(url))
            except ValueError:
                # urlparse.urlsplit can raise a ValueError with some
                # misformatted URLs.
                raise ValidationError(self.error_messages['invalid'])

        value = super(URLField, self).to_python(value)
        if value:
            url_fields = split_url(value)
            if not url_fields[0]:
                # If no URL scheme given, assume http://
                url_fields[0] = 'http'
            if not url_fields[1]:
                # Assume that if no domain is provided, that the path segment
                # contains the domain.
                url_fields[1] = url_fields[2]
                url_fields[2] = ''
                # Rebuild the url_fields list, since the domain segment may now
                # contain the path too.
                url_fields = split_url(urlunsplit(url_fields))
            if not url_fields[2]:
                # the path portion may need to be added before query params
                url_fields[2] = '/'
            value = urlunsplit(url_fields)
        return value

Source File: default.py From xbmc-addons-chinese with GNU General Public License v2.0

5 votes

def change_cdn(url):
    # if the cnd_url starts with an ip addr, it should be youku's old CDN
    # which rejects http requests randomly with status code > 400
    # change it to the dispatcher of aliCDN can do better
    # at least a little more recoverable from HTTP 403
    dispatcher_url = 'vali.cp31.ott.cibntv.net'
    if dispatcher_url in url:
        return url
    elif 'k.youku.com' in url:
        return url
    else:
        url_seg_list = list(urlparse.urlsplit(url))
        url_seg_list[1] = dispatcher_url
        return urlparse.urlunsplit(url_seg_list)

Source File: views.py From django-oidc-provider with MIT License

5 votes

def dispatch(self, request, *args, **kwargs):
        id_token_hint = request.GET.get('id_token_hint', '')
        post_logout_redirect_uri = request.GET.get('post_logout_redirect_uri', '')
        state = request.GET.get('state', '')
        client = None

        next_page = settings.get('OIDC_LOGIN_URL')
        after_end_session_hook = settings.get('OIDC_AFTER_END_SESSION_HOOK', import_str=True)

        if id_token_hint:
            client_id = client_id_from_id_token(id_token_hint)
            try:
                client = Client.objects.get(client_id=client_id)
                if post_logout_redirect_uri in client.post_logout_redirect_uris:
                    if state:
                        uri = urlsplit(post_logout_redirect_uri)
                        query_params = parse_qs(uri.query)
                        query_params['state'] = state
                        uri = uri._replace(query=urlencode(query_params, doseq=True))
                        next_page = urlunsplit(uri)
                    else:
                        next_page = post_logout_redirect_uri
            except Client.DoesNotExist:
                pass

        after_end_session_hook(
            request=request,
            id_token=id_token_hint,
            post_logout_redirect_uri=post_logout_redirect_uri,
            state=state,
            client=client,
            next_page=next_page
        )

        self.next_page = next_page
        return super(EndSessionView, self).dispatch(request, *args, **kwargs)

Source File: w9_urlredict.py From w9scan with GNU General Public License v2.0

5 votes

def audit(arg):
    Ii1iI = arg
    Oo = urlparse.urlparse(Ii1iI)
    I1Ii11I1Ii1i = urlparse.urlunsplit((Oo.scheme, Oo.netloc, Oo.path, "", ""))
    Oo0Ooo = urlparse.parse_qsl(Oo.query)
    oo = ["__VIEWSTATE", "IbtnEnter.x", "IbtnEnter.y"]
    for O0O0OO0O0O0, iiiii in Oo0Ooo:
        if O0O0OO0O0O0 in oo:
            continue
        debug("[RDB] %s %s-", O0O0OO0O0O0, I1Ii11I1Ii1i)
        IiII1I1i1i1ii = iI1(I1Ii11I1Ii1i, Oo0Ooo, O0O0OO0O0O0, iiiii)
        if IiII1I1i1i1ii:
            security_info(IiII1I1i1i1ii[1])
            return

Source File: prestoclient.py From presto-admin with Apache License 2.0

5 votes

def _get_response_from(self, uri):
        """
        Sends a GET request to the Presto server at the specified next_uri
        and updates the response

        Remove the scheme and host/port from the uri; the connection itself
        has that information.
        """
        parts = list(urlparse.urlsplit(uri))
        parts[0] = None
        parts[1] = None
        location = urlparse.urlunsplit(parts)
        conn = self._get_connection()
        headers = {"X-Presto-User": self.user}
        self._add_auth_headers(headers)
        conn.request("GET", location, headers=headers)
        response = conn.getresponse()

        if response.status != 200:
            conn.close()
            _LOGGER.error("Error making GET request to %s: %s %s" %
                          (uri, response.status, response.reason))
            return False

        answer = response.read()
        conn.close()

        self.response_from_server = json.loads(answer)
        _LOGGER.info("GET request successful for uri: " + uri)
        return True

Source File: iri2uri.py From googleapps-message-recall with Apache License 2.0

5 votes

def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri ,unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode('idna')
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri

Source File: bottle2.py From pyFileFixity with MIT License

5 votes

def url(self):
        """ Full URL as requested by the client (computed).

            This value is constructed out of different environment variables
            and includes scheme, host, port, scriptname, path and query string.
        """
        scheme = self.environ.get('wsgi.url_scheme', 'http')
        host   = self.environ.get('HTTP_X_FORWARDED_HOST', self.environ.get('HTTP_HOST', None))
        if not host:
            host = self.environ.get('SERVER_NAME')
            port = self.environ.get('SERVER_PORT', '80')
            if scheme + port not in ('https443', 'http80'):
                host += ':' + port
        parts = (scheme, host, urlquote(self.fullpath), self.query_string, '')
        return urlunsplit(parts)

Source File: __init__.py From misp42splunk with GNU Lesser General Public License v3.0

5 votes

def urldefrag(url):
    if "#" in url:
        s, n, p, q, frag = urlsplit(url)
        defrag = urlunsplit((s, n, p, q, ''))
    else:
        defrag = url
        frag = ''
    return defrag, frag

Source File: utils.py From snippet with MIT License

5 votes

def geturl(self):
        scheme = self.scheme if self.scheme else self.url.scheme
        netloc = self.netloc if self.netloc else self.url.netloc
        url = self.path if self.path else self.url.path
        params = self.params if self.params else self.url.params
        query = self.query if self.query else self.url.query
        fragment = self.fragment if self.fragment else self.url.fragment

        if params:
            url = "%s;%s" % (url, params)
        return urlparse.urlunsplit((scheme, netloc, url, query, fragment))

Source File: iri2uri.py From aqua-monitor with GNU Lesser General Public License v3.0

5 votes

def iri2uri(uri):
    """Convert an IRI to a URI. Note that IRIs must be
    passed in a unicode strings. That is, do not utf-8 encode
    the IRI before passing it into the function."""
    if isinstance(uri ,unicode):
        (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
        authority = authority.encode('idna')
        # For each character in 'ucschar' or 'iprivate'
        #  1. encode as utf-8
        #  2. then %-encode each octet of that utf-8
        uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
        uri = "".join([encode(c) for c in uri])
    return uri

Source File: __init__.py From rpm-s3 with BSD 2-Clause "Simplified" License

5 votes

def _getFragmentUrl(self, url, fragment):
        import urlparse
        urlparse.uses_fragment.append('media')
        if not url:
            return url
        (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
        return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment)))

Source File: wms_styles.py From minerva with Apache License 2.0

5 votes

def _generate_url(wms_url, **kwargs):
        """Generates different urls(wfs or wcs) from a wms url"""

        scheme, netloc, path, query_string, fragment = urlsplit(wms_url)

        if kwargs:
            query_string = kwargs

        new_query_string = urlencode(query_string, doseq=True)

        return urlunsplit((scheme, netloc, path, new_query_string, fragment))

Source File: scrapertools.py From tvalacarta with GNU General Public License v3.0

5 votes

def fixurl(url):
    # turn string into unicode
    if not isinstance(url,unicode):
        url = url.decode('utf8')

    # parse it
    parsed = urlparse.urlsplit(url)

    # divide the netloc further
    userpass,at,hostport = parsed.netloc.rpartition('@')
    user,colon1,pass_ = userpass.partition(':')
    host,colon2,port = hostport.partition(':')

    # encode each component
    scheme = parsed.scheme.encode('utf8')
    user = urllib.quote(user.encode('utf8'))
    colon1 = colon1.encode('utf8')
    pass_ = urllib.quote(pass_.encode('utf8'))
    at = at.encode('utf8')
    host = host.encode('idna')
    colon2 = colon2.encode('utf8')
    port = port.encode('utf8')
    path = '/'.join(  # could be encoded slashes!
        urllib.quote(urllib.unquote(pce).encode('utf8'),'')
        for pce in parsed.path.split('/')
    )
    query = urllib.quote(urllib.unquote(parsed.query).encode('utf8'),'=&?/')
    fragment = urllib.quote(urllib.unquote(parsed.fragment).encode('utf8'))

    # put it back together
    netloc = ''.join((user,colon1,pass_,at,host,colon2,port))
    return urlparse.urlunsplit((scheme,netloc,path,query,fragment))

# Some helper methods

Source File: DataServicePillager.py From DataPillager with MIT License

5 votes

def get_referring_domain(url_string):
    """get referring domain part of url
    :param url_string url of service
    """
    u = urlparse(url_string)
    if u.netloc.find('arcgis.com') > -1:
        # is an esri domain
        ref_domain = r"https://www.arcgis.com"
    else:
        # generate from service url and hope it works
        if u.scheme == 'http':
            ref_domain = urlunsplit(['https', u.netloc, '', '', ''])
        else:
            ref_domain = urlunsplit([u.scheme, u.netloc, '', '', ''])
    return ref_domain

Python urlparse.urlunsplit() Examples