Python urllib.parse.urlparse() Examples

The following are 30 code examples of urllib.parse.urlparse(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urllib.parse , or try the search function .
Example #1
Source File: qr_reader.py    From Authenticator with GNU General Public License v2.0 11 votes vote down vote up
def read(self):
        try:
            from PIL import Image
            from pyzbar.pyzbar import decode
            decoded_data = decode(Image.open(self.filename))
            if path.isfile(self.filename):
                remove(self.filename)
            try:
                url = urlparse(decoded_data[0].data.decode())
                query_params = parse_qsl(url.query)
                self._codes = dict(query_params)
                return self._codes.get("secret")
            except (KeyError, IndexError):
                Logger.error("Invalid QR image")
                return None
        except ImportError:
            from ..application import Application
            Application.USE_QRSCANNER = False
            QRReader.ZBAR_FOUND = False 
Example #2
Source File: download.py    From gog-galaxy-plugin-downloader with GNU General Public License v3.0 9 votes vote down vote up
def get_plugin_config(config_uri):
    """
    Downloads/opens configuration yaml file, returns
    dict of Galaxy plugins
    """
    # Try to open the URI as a URL or fall back to opening local file
    try:
        config_uri_parsed = urlparse(config_uri)
        if config_uri_parsed.scheme in ['https', 'http']:
            url = urlopen(config_uri)
            yaml_data = url.read()
        else:
            with open(config_uri, 'r') as file_data:
                yaml_data = file_data.read()
    except URLError as e:
        print(e)

    # Parse the YAML configuration
    try:
        plugin_data = yaml.safe_load(yaml_data)

        return plugin_data['plugins']
    except yaml.YAMLError as e:
        print(e) 
Example #3
Source File: asgi.py    From quart with MIT License 7 votes vote down vote up
def _create_websocket_from_scope(self, send: Callable) -> Websocket:
        headers = Headers()
        headers["Remote-Addr"] = (self.scope.get("client") or ["<local>"])[0]
        for name, value in self.scope["headers"]:
            headers.add(name.decode("latin1").title(), value.decode("latin1"))

        path = self.scope["path"]
        path = path if path[0] == "/" else urlparse(path).path

        return self.app.websocket_class(
            path,
            self.scope["query_string"],
            self.scope["scheme"],
            headers,
            self.scope.get("root_path", ""),
            self.scope.get("http_version", "1.1"),
            self.scope.get("subprotocols", []),
            self.queue.get,
            partial(self.send_data, send),
            partial(self.accept_connection, send),
        ) 
Example #4
Source File: diagnose.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 7 votes vote down vote up
def test_connection(name, url, timeout=10):
    """Simple connection test"""
    urlinfo = urlparse(url)
    start = time.time()
    try:
        ip = socket.gethostbyname(urlinfo.netloc)
    except Exception as e:
        print('Error resolving DNS for {}: {}, {}'.format(name, url, e))
        return
    dns_elapsed = time.time() - start
    start = time.time()
    try:
        _ = urlopen(url, timeout=timeout)
    except Exception as e:
        print("Error open {}: {}, {}, DNS finished in {} sec.".format(name, url, e, dns_elapsed))
        return
    load_elapsed = time.time() - start
    print("Timing for {}: {}, DNS: {:.4f} sec, LOAD: {:.4f} sec.".format(name, url, dns_elapsed, load_elapsed)) 
Example #5
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx = re.compile('<li class="b_algo"><h2><a href="(.*?)"')
        link_regx2 = re.compile('<div class="b_title"><h2><a href="(.*?)"')
        try:
            links = link_regx.findall(resp)
            links2 = link_regx2.findall(resp)
            links_list = links + links2

            for link in links_list:
                link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass

        return links_list 
Example #6
Source File: test_requests.py    From sanic with MIT License 6 votes vote down vote up
def test_url_attributes_with_ssl_dict(app, path, query, expected_url):

    current_dir = os.path.dirname(os.path.realpath(__file__))
    ssl_cert = os.path.join(current_dir, "certs/selfsigned.cert")
    ssl_key = os.path.join(current_dir, "certs/selfsigned.key")

    ssl_dict = {"cert": ssl_cert, "key": ssl_key}

    async def handler(request):
        return text("OK")

    app.add_route(handler, path)

    request, response = app.test_client.get(
        f"https://{HOST}:{PORT}" + path + f"?{query}",
        server_kwargs={"ssl": ssl_dict},
    )
    assert request.url == expected_url.format(HOST, request.server_port)

    parsed = urlparse(request.url)

    assert parsed.scheme == request.scheme
    assert parsed.path == request.path
    assert parsed.query == request.query_string
    assert parsed.netloc == request.host 
Example #7
Source File: tiny_proxy.py    From sslyze with GNU Affero General Public License v3.0 6 votes vote down vote up
def do_GET(self):
        (scm, netloc, path, params, query, fragment) = urlparse(self.path, "http")
        if scm != "http" or fragment or not netloc:
            self.send_error(400, "bad url %s" % self.path)
            return
        soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            if self._connect_to(netloc, soc):
                self.log_request()
                soc.send(
                    "%s %s %s\r\n" % (self.command, urlunparse(("", "", path, params, query, "")), self.request_version)
                )
                self.headers["Connection"] = "close"
                del self.headers["Proxy-Connection"]
                for key_val in self.headers.items():
                    soc.send("%s: %s\r\n" % key_val)
                soc.send("\r\n")
                self._read_write(soc)
        finally:
            logging.warning("Finished do_GET()")
            soc.close()
            self.connection.close() 
Example #8
Source File: utils.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def extract_http_scheme_host_port(http_url):
    '''Extract scheme, host and port from a HTTP URL.

    :param http_url: HTTP URL to extract.
    :type http_url: ``string``
    :returns: A tuple of scheme, host and port
    :rtype: ``tuple``

    :raises ValueError: If `http_url` is not in http(s)://hostname:port format.
    '''

    try:
        http_info = urlparse.urlparse(http_url)
    except Exception:
        raise ValueError(
            str(http_url) + " is not in http(s)://hostname:port format")

    if not http_info.scheme or not http_info.hostname or not http_info.port:
        raise ValueError(
            http_url + " is not in http(s)://hostname:port format")

    return (http_info.scheme, http_info.hostname, http_info.port) 
Example #9
Source File: __init__.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def __init__(self, splunkd_uri, session_key, schema):
        """
        Global Config.

        :param splunkd_uri:
        :param session_key:
        :param schema:
        :type schema: GlobalConfigSchema
        """
        self._splunkd_uri = splunkd_uri
        self._session_key = session_key
        self._schema = schema

        splunkd_info = urlparse(self._splunkd_uri)
        self._client = SplunkRestClient(
            self._session_key,
            self._schema.product,
            scheme=splunkd_info.scheme,
            host=splunkd_info.hostname,
            port=splunkd_info.port,
        )
        self._configuration = Configuration(self._client, self._schema)
        self._inputs = Inputs(self._client, self._schema)
        self._configs = Configs(self._client, self._schema)
        self._settings = Settings(self._client, self._schema) 
Example #10
Source File: promenade_client.py    From drydock with Apache License 2.0 6 votes vote down vote up
def _get_prom_url(self):
        # Get promenade url from Keystone session object

        ks_session = self._get_ks_session()

        try:
            prom_endpoint = ks_session.get_endpoint(
                interface='internal', service_type='kubernetesprovisioner')
        except exc.EndpointNotFound:
            self.logger.error("Could not find an internal interface"
                              " defined in Keystone for Promenade")

            raise errors.DriverError("Could not find an internal interface"
                                     " defined in Keystone for Promenade")

        prom_url = urlparse(prom_endpoint)

        return prom_url 
Example #11
Source File: base.py    From zun with Apache License 2.0 6 votes vote down vote up
def validate_link(self, link, bookmark=False):
        """Checks if the given link can get correct data."""
        # removes the scheme and net location parts of the link
        url_parts = list(urlparse.urlparse(link))
        url_parts[0] = url_parts[1] = ''

        # bookmark link should not have the version in the URL
        if bookmark and url_parts[2].startswith(PATH_PREFIX):
            return False

        full_path = urlparse.urlunparse(url_parts)
        try:
            self.get_json(full_path, path_prefix='')
            return True
        except Exception:
            return False 
Example #12
Source File: evillib.py    From wafw00f with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def urlParser(target):
    log = logging.getLogger('urlparser')

    ssl = False
    o = urlparse(target)
    if o[0] not in ['http', 'https', '']:
        log.error('scheme %s not supported' % o[0])
        return
    if o[0] == 'https':
        ssl = True
    if len(o[2]) > 0:
        path = o[2]
    else:
        path = '/'
    tmp = o[1].split(':')
    if len(tmp) > 1:
        port = tmp[1]
    else:
        port = None
    hostname = tmp[0]
    query = o[4]
    return (hostname, port, path, query, ssl) 
Example #13
Source File: url.py    From Vxscan with Apache License 2.0 6 votes vote down vote up
def dedup_link(urls):
    host = []
    _ = []
    furls = []
    for i in set(urls):
        # 只保留有参数的url其余的略过
        if '=' in i and not re.search(r"'|@|\+", i):
            #  判断url是不是伪静态,伪静态与普通的去重方法不一样
            if re.search(r'/\?\d+=', i):
                furls.append(i)
            else:
                # 通过urlparse 对url进行去参去重,相同的丢弃
                url = parse.urlparse(i)
                if url.netloc + url.path not in host:
                    host.append(url.netloc + url.path)
                    _.append(i)
    _.extend(diff(furls))
    return _ 
Example #14
Source File: requests_usbmux.py    From facebook-wda with MIT License 6 votes vote down vote up
def get_connection(self, url, proxies=None):
        proxies = proxies or {}
        proxy = proxies.get(urlparse(url.lower()).scheme)

        if proxy:
            raise ValueError('%s does not support specifying proxies' %
                             self.__class__.__name__)

        with self.pools.lock:
            pool = self.pools.get(url)
            if pool:
                return pool

            pool = UsbmuxHTTPConnectionPool(url, self.timeout)
            self.pools[url] = pool

        return pool 
Example #15
Source File: asgi.py    From quart with MIT License 6 votes vote down vote up
def _create_request_from_scope(self, send: Callable) -> Request:
        headers = Headers()
        headers["Remote-Addr"] = (self.scope.get("client") or ["<local>"])[0]
        for name, value in self.scope["headers"]:
            headers.add(name.decode("latin1").title(), value.decode("latin1"))
        if self.scope["http_version"] < "1.1":
            headers.setdefault("Host", self.app.config["SERVER_NAME"] or "")

        path = self.scope["path"]
        path = path if path[0] == "/" else urlparse(path).path

        return self.app.request_class(
            self.scope["method"],
            self.scope["scheme"],
            path,
            self.scope["query_string"],
            headers,
            self.scope.get("root_path", ""),
            self.scope["http_version"],
            max_content_length=self.app.config["MAX_CONTENT_LENGTH"],
            body_timeout=self.app.config["BODY_TIMEOUT"],
            send_push_promise=partial(self._send_push_promise, send),
            scope=self.scope,
        ) 
Example #16
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                link = re.sub('<span.*>', '', link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain and subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass
        return links_list 
Example #17
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx2 = re.compile('<span class=" fz-15px fw-m fc-12th wr-bw.*?">(.*?)</span>')
        link_regx = re.compile('<span class="txt"><span class=" cite fw-xl fz-15px">(.*?)</span>')
        links_list = []
        try:
            links = link_regx.findall(resp)
            links2 = link_regx2.findall(resp)
            links_list = links + links2
            for link in links_list:
                link = re.sub("<(\/)?b>", "", link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if not subdomain.endswith(self.domain):
                    continue
                if subdomain and subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass

        return links_list 
Example #18
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx = re.compile('<p class="web-result-url">(.*?)</p>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass

        return links_list 
Example #19
Source File: connections.py    From python-esppy with Apache License 2.0 6 votes vote down vote up
def __init__(self,session,**kwargs):
        tools.Options.__init__(self,**kwargs)

        self._session = session

        url = urlparse(self._session.conn_url)

        self._secure = False

        if url[0] == "https":
            self._secure = True

        s = url[1].split(":")

        self._host = s[0]
        self._port = s[1]

        self._websocket = None
        self._handshakeComplete = False
        self._headers = None
        self._authorization = None 
Example #20
Source File: port_scan.py    From Vxscan with Apache License 2.0 5 votes vote down vote up
def pool(self):
        out = []
        try:
            # 判断给出的url是www.baiud.com还是www.baidu.com/path这种形式
            if (not parse.urlparse(self.ipaddr).path) and (parse.urlparse(self.ipaddr).path != '/'):
                self.ipaddr = self.ipaddr.replace('http://', '').replace('https://', '').rstrip('/')
            else:
                self.ipaddr = self.ipaddr.replace('http://', '').replace('https://', '').rstrip('/')
                self.ipaddr = re.sub(r'/\w+', '', self.ipaddr)
            if re.search(r'\d+\.\d+\.\d+\.\d+', self.ipaddr):
                ipaddr = self.ipaddr
            else:
                ipaddr = socket.gethostbyname(self.ipaddr)
            if ':' in ipaddr:
                ipaddr = re.sub(r':\d+', '', ipaddr)
            self.run(ipaddr)
        except Exception as e:
            pass

        if self.num == 0:
            self.save(self.ipaddr, self.out)
            for _ in self.out:
                out.append('{}:{}'.format(_.get('server'), _.get('port')))
                console('PortScan', self.ipaddr, '{}:{}\n'.format(_.get('server'), _.get('port')))
            return out
        else:
            self.save(self.ipaddr, [{"server": 'Portspoof', "port": '0', "banner": ''}])
            console('PortScan', self.ipaddr, 'Portspoof:0\n')
            return ['Portspoof:0'] 
Example #21
Source File: file_utils.py    From mrc-for-flat-nested-ner with Apache License 2.0 5 votes vote down vote up
def split_s3_path(url: str) -> Tuple[str, str]:
    """Split a full s3 path into the bucket name and path."""
    parsed = urlparse(url)
    if not parsed.netloc or not parsed.path:
        raise ValueError("bad s3 path {}".format(url))
    bucket_name = parsed.netloc
    s3_path = parsed.path
    # Remove '/' at beginning of path.
    if s3_path.startswith("/"):
        s3_path = s3_path[1:]
    return bucket_name, s3_path 
Example #22
Source File: verify.py    From Vxscan with Apache License 2.0 5 votes vote down vote up
def verify_https(url):
    # 验证域名是http或者https的
    # 如果域名是302跳转 则获取跳转后的地址
    req = Requests()
    # noinspection PyBroadException
    if '://' in url:
        try:
            r = req.get(url)
            return url
        except Exception as e:
            pass
    host = parse_host(url)
    url2 = parse.urlparse(url)
    if url2.netloc:
        url = url2.netloc
    elif url2.path:
        url = url2.path
    # noinspection PyBroadException
    try:
        r = req.get('https://' + url)
        getattr(r, 'status_code')
        console('Verify', host, 'https://' + url + '\n')
        return 'https://' + url
    except AttributeError:
        # noinspection PyBroadException
        try:
            req.get('http://' + url)
            console('Verify', host, 'http://' + url + '\n')
            return 'http://' + url
        except Exception:
            pass
    except Exception as e:
        logging.exception(e) 
Example #23
Source File: file_utils.py    From mrc-for-flat-nested-ner with Apache License 2.0 5 votes vote down vote up
def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str:
    """
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.
    """
    if cache_dir is None:
        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
    if isinstance(url_or_filename, Path):
        url_or_filename = str(url_or_filename)
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    parsed = urlparse(url_or_filename)

    if parsed.scheme in ('http', 'https', 's3'):
        # URL, so get it from the cache (downloading if necessary)
        return get_from_cache(url_or_filename, cache_dir)
    elif os.path.exists(url_or_filename):
        # File, and it exists.
        return url_or_filename
    elif parsed.scheme == '':
        # File, but it doesn't exist.
        raise FileNotFoundError("file {} not found".format(url_or_filename))
    else:
        # Something unknown
        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) 
Example #24
Source File: url_checker.py    From YaYaGen with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def find_url_strings(self, url):
        """
        Return php pages extracted from the urls
        """
        strings = list()
        output = urlparse(url)
        if output.path.endswith(".php"):
            strings.append(output.path)
        return strings 
Example #25
Source File: _https_client.py    From oscrypto with MIT License 5 votes vote down vote up
def setup_connection(self, url, timeout):
        """
        :param url:
            The URL to download

        :param timeout:
            The int number of seconds to set the timeout to

        :return:
            A boolean indicating if the connection was reused
        """

        url_info = urlparse(url)
        if url_info.scheme == 'http':
            raise HttpsClientException('Can not connect to a non-TLS server')
        hostname = url_info.hostname
        port = url_info.port
        if not port:
            port = 443

        if self.socket and self.url_info != (hostname, port):
            self.close()

        self.timeout = timeout
        self.url_info = (hostname, port)

        return self.ensure_connected() 
Example #26
Source File: url.py    From Vxscan with Apache License 2.0 5 votes vote down vote up
def diff(urls):
    parms = []
    host = []
    result = []
    path = []
    # url = 'https://www.xxx.com/?page=1
    # 伪静态去重 通过urlparse取出来page=1,根据逗号拆分取出来k=page,然后保存不重复的k
    for i in urls:
        url = parse.urlparse(i)
        print(url)
        k, v, *_ = url.query.split('=')
        if url.netloc in host:
            if url.path in path:
                if k not in parms:
                    parms.append(k)
                    result.append(i)
            else:
                result.append(i)
                path.append(url.path)
        else:
            host.append(url.netloc)
            result.append(i)
            path.append(url.path)
            parms.append(k)

    return result 
Example #27
Source File: url.py    From Vxscan with Apache License 2.0 5 votes vote down vote up
def parse_host(url):
    # 根据url得到主机host 例如 http://1.1.1.1:80 返回 1.1.1.1
    if (not parse.urlparse(url).path) and (parse.urlparse(url).path != '/'):
        host = url.replace('http://', '').replace('https://', '').rstrip('/')
    else:
        host = url.replace('http://', '').replace('https://', '').rstrip('/')
        host = re.sub(r'/\w+', '', host)
    if ':' in host:
        host = re.sub(r':\d+', '', host)
    return host 
Example #28
Source File: crawl.py    From Vxscan with Apache License 2.0 5 votes vote down vote up
def dedup_url(urls):
    urls = list(set(urls))
    result = []
    okurl = []
    for i in urls:
        urlparse = parse.urlparse(i)
        path = urlparse.path
        if path and path.split('/')[-2]:
            key = path.split('/')[-2]
            if key not in result:
                result.append(key)
                okurl.append(i)
        else:
            okurl.append(i)
    return okurl 
Example #29
Source File: credentials.py    From misp42splunk with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(
            self,
            splunkd_uri,
            session_key,
            endpoint
    ):
        self._splunkd_uri = splunkd_uri
        self._splunkd_info = urlparse(self._splunkd_uri)
        self._session_key = session_key
        self._endpoint = endpoint
        self._realm = '__REST_CREDENTIAL__#{base_app}#{endpoint}'.format(
            base_app=get_base_app_name(),
            endpoint=self._endpoint.internal_endpoint.strip('/')
        ) 
Example #30
Source File: client.py    From pywren-ibm-cloud with Apache License 2.0 5 votes vote down vote up
def invoke(self, package, action_name, payload={}, is_ow_action=False, self_invoked=False):
        """
        Invoke an IBM Cloud Function by using new request.
        """
        url = '/'.join([self.endpoint, 'api', 'v1', 'namespaces', self.namespace, 'actions', package, action_name])
        parsed_url = urlparse(url)

        try:
            if is_ow_action:
                resp = self.session.post(url, json=payload, verify=False)
                resp_status = resp.status_code
                data = resp.json()
            else:
                ctx = ssl._create_unverified_context()
                conn = http.client.HTTPSConnection(parsed_url.netloc, context=ctx)
                conn.request("POST", parsed_url.geturl(),
                             body=json.dumps(payload),
                             headers=self.headers)
                resp = conn.getresponse()
                resp_status = resp.status
                data = json.loads(resp.read().decode("utf-8"))
                conn.close()
        except Exception:
            if not is_ow_action:
                conn.close()
            if self_invoked:
                return None
            return self.invoke(package, action_name, payload, is_ow_action=is_ow_action, self_invoked=True)

        if resp_status == 202 and 'activationId' in data:
            return data["activationId"]
        elif resp_status == 429:
            return None  # "Too many concurrent requests in flight"
        else:
            logger.debug(data)
            if resp_status == 401:
                raise Exception('Unauthorized - Invalid API Key')
            elif resp_status == 404:
                raise Exception('Runtime: {} not deployed'.format(action_name))
            else:
                raise Exception(data['error'])