Python urllib.parse.urlunsplit() Examples

The following are 30 code examples of urllib.parse.urlunsplit(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urllib.parse , or try the search function .
Example #1
Source File: storage.py    From Hands-On-Application-Development-with-PyCharm with MIT License 6 votes vote down vote up
def stored_name(self, name):
        parsed_name = urlsplit(unquote(name))
        clean_name = parsed_name.path.strip()
        hash_key = self.hash_key(clean_name)
        cache_name = self.hashed_files.get(hash_key)
        if cache_name is None:
            if self.manifest_strict:
                raise ValueError("Missing staticfiles manifest entry for '%s'" % clean_name)
            cache_name = self.clean_name(self.hashed_name(name))
        unparsed_name = list(parsed_name)
        unparsed_name[2] = cache_name
        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        if '?#' in name and not unparsed_name[3]:
            unparsed_name[2] += '?'
        return urlunsplit(unparsed_name) 
Example #2
Source File: base.py    From Hands-On-Application-Development-with-PyCharm with MIT License 6 votes vote down vote up
def translate_url(url, lang_code):
    """
    Given a URL (absolute or relative), try to get its translated version in
    the `lang_code` language (either by i18n_patterns or by translated regex).
    Return the original URL if no translated version is found.
    """
    parsed = urlsplit(url)
    try:
        match = resolve(parsed.path)
    except Resolver404:
        pass
    else:
        to_be_reversed = "%s:%s" % (match.namespace, match.url_name) if match.namespace else match.url_name
        with override(lang_code):
            try:
                url = reverse(to_be_reversed, args=match.args, kwargs=match.kwargs)
            except NoReverseMatch:
                pass
            else:
                url = urlunsplit((parsed.scheme, parsed.netloc, url, parsed.query, parsed.fragment))
    return url 
Example #3
Source File: http_to_https.py    From hypercorn with MIT License 6 votes vote down vote up
def _send_websocket_redirect(self, scope: dict, send: Callable) -> None:
        # If the HTTP version is 2 we should redirect with a https
        # scheme not wss.

        scheme = "wss"
        if scope.get("http_version", "1.1") == "2":
            scheme = "https"

        new_url = urlunsplit(
            (scheme, self.host, scope["raw_path"].decode(), scope["query_string"].decode(), "")
        )
        await send(
            {
                "type": "websocket.http.response.start",
                "status": 307,
                "headers": [(b"location", new_url.encode())],
            }
        )
        await send({"type": "websocket.http.response.body"}) 
Example #4
Source File: utils.py    From modmail with GNU Affero General Public License v3.0 6 votes vote down vote up
def parse_image_url(url: str) -> str:
    """
    Convert the image URL into a sized Discord avatar.

    Parameters
    ----------
    url : str
        The URL to convert.

    Returns
    -------
    str
        The converted URL, or '' if the URL isn't in the proper format.
    """
    types = [".png", ".jpg", ".gif", ".jpeg", ".webp"]
    url = parse.urlsplit(url)

    if any(url.path.lower().endswith(i) for i in types):
        return parse.urlunsplit((*url[:3], "size=128", url[-1]))
    return "" 
Example #5
Source File: html.py    From luscan-devel with GNU General Public License v2.0 6 votes vote down vote up
def smart_urlquote(url):
    "Quotes a URL if it isn't already quoted."
    # Handle IDN before quoting.
    scheme, netloc, path, query, fragment = urlsplit(url)
    try:
        netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
    except UnicodeError: # invalid domain part
        pass
    else:
        url = urlunsplit((scheme, netloc, path, query, fragment))

    url = unquote(force_str(url))
    # See http://bugs.python.org/issue2637
    url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')

    return force_text(url) 
Example #6
Source File: validators.py    From luscan-devel with GNU General Public License v2.0 6 votes vote down vote up
def __call__(self, value):
        try:
            super(URLValidator, self).__call__(value)
        except ValidationError as e:
            # Trivial case failed. Try for possible IDN domain
            if value:
                value = force_text(value)
                scheme, netloc, path, query, fragment = urlsplit(value)
                try:
                    netloc = netloc.encode('idna').decode('ascii')  # IDN -> ACE
                except UnicodeError:  # invalid domain part
                    raise e
                url = urlunsplit((scheme, netloc, path, query, fragment))
                super(URLValidator, self).__call__(url)
            else:
                raise
        else:
            url = value 
Example #7
Source File: storage.py    From bioforum with MIT License 6 votes vote down vote up
def stored_name(self, name):
        parsed_name = urlsplit(unquote(name))
        clean_name = parsed_name.path.strip()
        hash_key = self.hash_key(clean_name)
        cache_name = self.hashed_files.get(hash_key)
        if cache_name is None:
            if self.manifest_strict:
                raise ValueError("Missing staticfiles manifest entry for '%s'" % clean_name)
            cache_name = self.clean_name(self.hashed_name(name))
        unparsed_name = list(parsed_name)
        unparsed_name[2] = cache_name
        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        if '?#' in name and not unparsed_name[3]:
            unparsed_name[2] += '?'
        return urlunsplit(unparsed_name) 
Example #8
Source File: config.py    From freight with Apache License 2.0 6 votes vote down vote up
def docker_init_app(app):
    if "POSTGRES_PORT_5432_TCP_ADDR" in os.environ:
        scheme = "postgresql"
        host = os.environ["POSTGRES_PORT_5432_TCP_ADDR"]
        user = os.environ.get("POSTGRES_ENV_POSTGRES_USER") or "postgres"
        password = os.environ.get("POSTGRES_ENV_POSTGRES_PASSWORD")
        db = os.environ.get("POSTGRES_ENV_POSTGRES_DB") or user
        if user and password:
            netloc = f"{user}:{password}@{host}"
        elif user:
            netloc = f"{user}@{host}"
        else:
            netloc = host
        if not app.config.get("SQLALCHEMY_DATABASE_URI"):
            app.config["SQLALCHEMY_DATABASE_URI"] = urlunsplit(
                (scheme, netloc, db, None, None)
            )

    if "REDIS_PORT_6379_TCP_ADDR" in os.environ:
        scheme = "redis"
        host = os.environ["REDIS_PORT_6379_TCP_ADDR"]
        port = 6379
        netloc = f"{host}:{port}"
        app.config.setdefault("REDIS_URL", urlunsplit((scheme, netloc, "", None, None))) 
Example #9
Source File: utils.py    From custom-resource-helper with Apache License 2.0 6 votes vote down vote up
def _send_response(response_url, response_body):
    try:
        json_response_body = json.dumps(response_body)
    except Exception as e:
        msg = "Failed to convert response to json: {}".format(str(e))
        logger.error(msg, exc_info=True)
        response_body = {'Status': 'FAILED', 'Data': {}, 'Reason': msg}
        json_response_body = json.dumps(response_body)
    logger.debug("CFN response URL: {}".format(response_url))
    logger.debug(json_response_body)
    headers = {'content-type': '', 'content-length': str(len(json_response_body))}
    split_url = urlsplit(response_url)
    host = split_url.netloc
    url = urlunsplit(("", "", *split_url[2:]))
    while True:
        try:
            connection = HTTPSConnection(host)
            connection.request(method="PUT", url=url, body=json_response_body, headers=headers)
            response = connection.getresponse()
            logger.info("CloudFormation returned status code: {}".format(response.reason))
            break
        except Exception as e:
            logger.error("Unexpected failure sending response to CloudFormation {}".format(e), exc_info=True)
            time.sleep(5) 
Example #10
Source File: base.py    From bioforum with MIT License 6 votes vote down vote up
def translate_url(url, lang_code):
    """
    Given a URL (absolute or relative), try to get its translated version in
    the `lang_code` language (either by i18n_patterns or by translated regex).
    Return the original URL if no translated version is found.
    """
    parsed = urlsplit(url)
    try:
        match = resolve(parsed.path)
    except Resolver404:
        pass
    else:
        to_be_reversed = "%s:%s" % (match.namespace, match.url_name) if match.namespace else match.url_name
        with override(lang_code):
            try:
                url = reverse(to_be_reversed, args=match.args, kwargs=match.kwargs)
            except NoReverseMatch:
                pass
            else:
                url = urlunsplit((parsed.scheme, parsed.netloc, url, parsed.query, parsed.fragment))
    return url 
Example #11
Source File: api_requests.py    From soweego with GNU General Public License v3.0 6 votes vote down vote up
def _build_sitelink_url(site, title):
    netloc_builder = []
    split_index = site.find('wiki')
    language = site[:split_index]
    netloc_builder.append(language.replace('_', '-'))
    project = site[split_index:]
    if project == 'wiki':
        project = 'wikipedia'
    if language == 'commons':
        project = 'wikimedia'
    netloc_builder.append(project)
    netloc_builder.append('org')
    url = urlunsplit(
        (
            'https',
            '.'.join(netloc_builder),
            '/wiki/%s' % title.replace(' ', '_'),
            '',
            '',
        )
    )
    LOGGER.debug('Site: %s - Title: %s - Full URL: %s', site, title, url)
    return url 
Example #12
Source File: utils.py    From fence with Apache License 2.0 6 votes vote down vote up
def append_query_params(original_url, **kwargs):
    """
    Add additional query string arguments to the given url.

    Example call:
        new_url = append_query_params(
            original_url, error='this is an error',
            another_arg='this is another argument')
    """
    scheme, netloc, path, query_string, fragment = urlsplit(original_url)
    query_params = parse_qs(query_string)
    if kwargs is not None:
        for key, value in kwargs.items():
            query_params[key] = [value]

    new_query_string = urlencode(query_params, doseq=True)
    new_url = urlunsplit((scheme, netloc, path, new_query_string, fragment))
    return new_url 
Example #13
Source File: sources.py    From orbit-predictor with MIT License 6 votes vote down vote up
def _fetch_tle(self, path, sate_id, date=None):
        url = urlparse.urljoin(self.url, path)
        url = urlparse.urlparse(url)
        qargs = {'satellite_number': sate_id}
        if date is not None:
            date_str = date.strftime("%Y-%m-%d")
            qargs['date'] = date_str

        query_string = urlencode(qargs)
        url = urlparse.urlunsplit((url.scheme, url.netloc, url.path, query_string, url.fragment))
        headers = {'user-agent': 'orbit-predictor', 'Accept': 'application/json'}
        try:
            response = requests.get(url, headers=headers)
        except requests.exceptions.RequestException as error:
            logger.error("Exception requesting TLE: %s", error)
            raise
        if response.ok and 'lines' in response.json():
            lines = tuple(response.json()['lines'])
            return lines
        else:
            raise ValueError("Error requesting TLE: %s", response.text) 
Example #14
Source File: utils.py    From flask-security with MIT License 6 votes vote down vote up
def transform_url(url, qparams=None, **kwargs):
    """ Modify url

    :param url: url to transform (can be relative)
    :param qparams: additional query params to add to end of url
    :param kwargs: pieces of URL to modify - e.g. netloc=localhost:8000
    :return: Modified URL

    .. versionadded:: 3.2.0
    """
    if not url:
        return url
    link_parse = urlsplit(url)
    if qparams:
        current_query = dict(parse_qsl(link_parse.query))
        current_query.update(qparams)
        link_parse = link_parse._replace(query=urlencode(current_query))
    return urlunsplit(link_parse._replace(**kwargs)) 
Example #15
Source File: forms.py    From Bitpoll with GNU General Public License v3.0 6 votes vote down vote up
def compress(self, data_list):
        user = quote_plus(data_list[1])
        passwd = quote_plus(data_list[2])
        auth = user
        if passwd:
            auth += ':'
            auth += passwd
        parsed = urlsplit(data_list[0])
        if auth:
            host = auth + '@' + parsed.netloc
            return urlunsplit((
                parsed.scheme,
                host,
                parsed.path,
                parsed.query,
                parsed.fragment
            ))
        return parsed.url 
Example #16
Source File: test_config.py    From marvin with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_sasurl_join(self, sas, exp):
        url = '/marvin/api/cubes/8485-1901/'
        public = sas == 'public'
        test = sas == 'test'
        if sas == 'testpub':
            public = test = True
        sas = 'utah' if sas != 'local' else sas
        config.switchSasUrl(sas, public=public, test=test)

        e = urlsplit(config.sasurl)
        t = urlsplit(url)
        final = urlunsplit(tuple(strjoin(*z) for z in zip(e, t)))
        assert exp == final

    # @pytest.mark.parametrize('set_default, defrel, exp',
    #                          [('MPL-5', 'MPL-5', 'api.sdss.org'),
    #                           ('DR15', 'DR15', 'dr15.sdss.org/api')], indirect=['set_default'])
    # def test_sasurl_default_release(self, set_default, defrel, exp):
    #     assert config.release == defrel
    #     assert exp in config.sasurl
    # 
Example #17
Source File: util.py    From pypowervm with Apache License 2.0 6 votes vote down vote up
def extend_basepath(href, add):
    """Extends the base path of an href, accounting for querystring/fragment.

    For example, extend_basepath('http://server:1234/foo?a=b&c=d#frag', '/bar')
    => 'http://server:1234/foo/bar?a=b&c=d#frag'

    :param href: Path or href to augment.  Scheme, netloc, query string, and
                 fragment are allowed but not required.
    :param add: String to add onto the base path of the href.  Must not contain
                unescaped special characters such as '?', '&', '#'.
    :return: The augmented href.
    """
    parsed = urlparse.urlsplit(href)
    basepath = parsed.path + add
    return urlparse.urlunsplit((parsed.scheme, parsed.netloc, basepath,
                                parsed.query, parsed.fragment)) 
Example #18
Source File: net.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found']) 
Example #19
Source File: finder.py    From rssant with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _guess_links(self):
        path_segments = self.path.split("/")
        maybe_contains_feed = []
        maybe_feed = []
        root = urlunsplit((self.scheme, self.netloc, "", "", ""))
        maybe_contains_feed.append(ScoredLink(root, 0.5))
        for i in range(len(path_segments)):
            path = "/".join(path_segments[:i])
            url = urlunsplit((self.scheme, self.netloc, path, "", ""))
            maybe_contains_feed.append(ScoredLink(url, 1.0 / (i + 3)))
            for k in MAYBE_FEEDS:
                path = "/".join(path_segments[:i] + [k])
                url = urlunsplit((self.scheme, self.netloc, path, "", ""))
                maybe_feed.append(ScoredLink(url, 1.0 / (i + 4)))
        links = maybe_contains_feed + maybe_feed
        self._merge_links(links) 
Example #20
Source File: net.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found']) 
Example #21
Source File: util.py    From byob with GNU General Public License v3.0 5 votes vote down vote up
def pastebin(source, api_key):
    """
    Upload file/data to Pastebin

    `Required`
    :param str source:         data or readable file-like object
    :param str api_dev_key:    Pastebin api_dev_key

    `Optional`
    :param str api_user_key:   Pastebin api_user_key

    """
    import sys
    if sys.version_info[0] > 2:
        from urllib.parse import urlsplit,urlunsplit
    else:
        from urllib2 import urlparse
        urlsplit = urlparse.urlsplit
        urlunsplit = urlparse.urlunsplit
    if isinstance(api_key, str):
        try:
            info = {'api_option': 'paste', 'api_paste_code': normalize(source), 'api_dev_key': api_key}
            paste = post('https://pastebin.com/api/api_post.php', data=info)
            parts = urlsplit(paste)
            result = urlunsplit((parts.scheme, parts.netloc, '/raw' + parts.path, parts.query, parts.fragment)) if paste.startswith('http') else paste
            if not result.endswith('/'):
                result += '/'
            return result
        except Exception as e:
            log("Upload to Pastebin failed with error: {}".format(e))
    else:
        log("No Pastebin API key found") 
Example #22
Source File: util.py    From byob with GNU General Public License v3.0 5 votes vote down vote up
def pastebin(source, api_key):
    """
    Upload file/data to Pastebin

    `Required`
    :param str source:         data or readable file-like object
    :param str api_dev_key:    Pastebin api_dev_key

    `Optional`
    :param str api_user_key:   Pastebin api_user_key

    """
    import sys
    if sys.version_info[0] > 2:
        from urllib.parse import urlsplit,urlunsplit
    else:
        from urllib2 import urlparse
        urlsplit = urlparse.urlsplit
        urlunsplit = urlparse.urlunsplit
    if isinstance(api_key, str):
        try:
            info = {'api_option': 'paste', 'api_paste_code': normalize(source), 'api_dev_key': api_key}
            paste = post('https://pastebin.com/api/api_post.php', data=info)
            parts = urlsplit(paste)
            result = urlunsplit((parts.scheme, parts.netloc, '/raw' + parts.path, parts.query, parts.fragment)) if paste.startswith('http') else paste
            if not result.endswith('/'):
                result += '/'
            return result
        except Exception as e:
            log("Upload to Pastebin failed with error: {}".format(e))
    else:
        log("No Pastebin API key found") 
Example #23
Source File: storage.py    From Hands-On-Application-Development-with-PyCharm with MIT License 5 votes vote down vote up
def hashed_name(self, name, content=None, filename=None):
        # `filename` is the name of file to hash if `content` isn't given.
        # `name` is the base name to construct the new hashed filename from.
        parsed_name = urlsplit(unquote(name))
        clean_name = parsed_name.path.strip()
        filename = (filename and urlsplit(unquote(filename)).path.strip()) or clean_name
        opened = content is None
        if opened:
            if not self.exists(filename):
                raise ValueError("The file '%s' could not be found with %r." % (filename, self))
            try:
                content = self.open(filename)
            except IOError:
                # Handle directory paths and fragments
                return name
        try:
            file_hash = self.file_hash(clean_name, content)
        finally:
            if opened:
                content.close()
        path, filename = os.path.split(clean_name)
        root, ext = os.path.splitext(filename)
        if file_hash is not None:
            file_hash = ".%s" % file_hash
        hashed_name = os.path.join(path, "%s%s%s" %
                                   (root, file_hash, ext))
        unparsed_name = list(parsed_name)
        unparsed_name[2] = hashed_name
        # Special casing for a @font-face hack, like url(myfont.eot?#iefix")
        # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
        if '?#' in name and not unparsed_name[3]:
            unparsed_name[2] += '?'
        return urlunsplit(unparsed_name) 
Example #24
Source File: views.py    From django-oidc-provider with MIT License 5 votes vote down vote up
def dispatch(self, request, *args, **kwargs):
        id_token_hint = request.GET.get('id_token_hint', '')
        post_logout_redirect_uri = request.GET.get('post_logout_redirect_uri', '')
        state = request.GET.get('state', '')
        client = None

        next_page = settings.get('OIDC_LOGIN_URL')
        after_end_session_hook = settings.get('OIDC_AFTER_END_SESSION_HOOK', import_str=True)

        if id_token_hint:
            client_id = client_id_from_id_token(id_token_hint)
            try:
                client = Client.objects.get(client_id=client_id)
                if post_logout_redirect_uri in client.post_logout_redirect_uris:
                    if state:
                        uri = urlsplit(post_logout_redirect_uri)
                        query_params = parse_qs(uri.query)
                        query_params['state'] = state
                        uri = uri._replace(query=urlencode(query_params, doseq=True))
                        next_page = urlunsplit(uri)
                    else:
                        next_page = post_logout_redirect_uri
            except Client.DoesNotExist:
                pass

        after_end_session_hook(
            request=request,
            id_token=id_token_hint,
            post_logout_redirect_uri=post_logout_redirect_uri,
            state=state,
            client=client,
            next_page=next_page
        )

        self.next_page = next_page
        return super(EndSessionView, self).dispatch(request, *args, **kwargs) 
Example #25
Source File: fields.py    From Hands-On-Application-Development-with-PyCharm with MIT License 5 votes vote down vote up
def to_python(self, value):

        def split_url(url):
            """
            Return a list of url parts via urlparse.urlsplit(), or raise
            ValidationError for some malformed URLs.
            """
            try:
                return list(urlsplit(url))
            except ValueError:
                # urlparse.urlsplit can raise a ValueError with some
                # misformatted URLs.
                raise ValidationError(self.error_messages['invalid'], code='invalid')

        value = super().to_python(value)
        if value:
            url_fields = split_url(value)
            if not url_fields[0]:
                # If no URL scheme given, assume http://
                url_fields[0] = 'http'
            if not url_fields[1]:
                # Assume that if no domain is provided, that the path segment
                # contains the domain.
                url_fields[1] = url_fields[2]
                url_fields[2] = ''
                # Rebuild the url_fields list, since the domain segment may now
                # contain the path too.
                url_fields = split_url(urlunsplit(url_fields))
            value = urlunsplit(url_fields)
        return value 
Example #26
Source File: backend_address.py    From parsec-cloud with GNU Affero General Public License v3.0 5 votes vote down vote up
def to_url(self) -> str:
        _, custom_port = self._parse_port(self._port, self._use_ssl)
        if custom_port:
            netloc = f"{self._hostname}:{custom_port}"
        else:
            netloc = self.hostname
        query = "&".join(f"{k}={quote_plus(v)}" for k, v in self._to_url_get_params())
        return urlunsplit((PARSEC_SCHEME, netloc, quote_plus(self._to_url_get_path()), query, None)) 
Example #27
Source File: internet.py    From trafaret with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def decode_url_idna(value):
    scheme, netloc, path, query, fragment = urlparse.urlsplit(value)
    netloc = netloc.encode('idna').decode('ascii')  # IDN -> ACE
    return urlparse.urlunsplit((scheme, netloc, path, query, fragment)) 
Example #28
Source File: utils.py    From fence with Apache License 2.0 5 votes vote down vote up
def split_url_and_query_params(url):
    scheme, netloc, path, query_string, fragment = urlsplit(url)
    query_params = parse_qs(query_string)
    url = urlunsplit((scheme, netloc, path, None, fragment))
    return url, query_params 
Example #29
Source File: authorize.py    From django-oidc-provider with MIT License 5 votes vote down vote up
def strip_prompt_login(path):
    """
    Strips 'login' from the 'prompt' query parameter.
    """
    uri = urlsplit(path)
    query_params = parse_qs(uri.query)
    prompt_list = query_params.get('prompt', '')[0].split()
    if 'login' in prompt_list:
        prompt_list.remove('login')
        query_params['prompt'] = ' '.join(prompt_list)
    if not query_params['prompt']:
        del query_params['prompt']
    uri = uri._replace(query=urlencode(query_params, doseq=True))
    return urlunsplit(uri) 
Example #30
Source File: finder.py    From rssant with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _parse_link(self, tag, page_url):
        link_rel = tag.get("rel", "")
        if not isinstance(link_rel, str):
            link_rel = ' '.join(link_rel)
        link_rel = link_rel.lower()
        if link_rel:
            for key in LINK_REL_NOT_FEED:
                if key in link_rel:
                    return None
        link_type = str(tag.get("type", "")).lower()
        if link_type:
            for key in LINK_TYPE_NOT_FEED:
                if key in link_type:
                    return None
        url = tag.get("href", "")
        if not url:
            return None
        if not (url.startswith('http://') or url.startswith('https://')):
            url = urljoin(page_url, url)  # 处理相对路径
        scheme, netloc, path, query, fragment = urlsplit(url)
        base_netloc = '.'.join(netloc.rsplit('.', 2)[-2:])
        if (not netloc) or base_netloc not in self.netloc:
            return None
        if not scheme:
            scheme = self.scheme
        else:
            scheme = scheme.lower()
            if scheme == 'feed':
                scheme = self.scheme
            elif scheme not in {'http', 'https'}:
                return None
        lower_path = path.lower()
        for key in URL_ENDS_NOT_FEED:
            if lower_path.endswith(key):
                return None
        url = urlunsplit((scheme, netloc, path, query, None))
        try:
            validate_url(url)
        except Invalid:
            return None
        return self._score_link(url, lower_path, link_rel, link_type)