Python urllib.parse.urlparse() Examples
The following are 30
code examples of urllib.parse.urlparse().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urllib.parse
, or try the search function
.
Example #1
Source File: qr_reader.py From Authenticator with GNU General Public License v2.0 | 11 votes |
def read(self): try: from PIL import Image from pyzbar.pyzbar import decode decoded_data = decode(Image.open(self.filename)) if path.isfile(self.filename): remove(self.filename) try: url = urlparse(decoded_data[0].data.decode()) query_params = parse_qsl(url.query) self._codes = dict(query_params) return self._codes.get("secret") except (KeyError, IndexError): Logger.error("Invalid QR image") return None except ImportError: from ..application import Application Application.USE_QRSCANNER = False QRReader.ZBAR_FOUND = False
Example #2
Source File: download.py From gog-galaxy-plugin-downloader with GNU General Public License v3.0 | 9 votes |
def get_plugin_config(config_uri): """ Downloads/opens configuration yaml file, returns dict of Galaxy plugins """ # Try to open the URI as a URL or fall back to opening local file try: config_uri_parsed = urlparse(config_uri) if config_uri_parsed.scheme in ['https', 'http']: url = urlopen(config_uri) yaml_data = url.read() else: with open(config_uri, 'r') as file_data: yaml_data = file_data.read() except URLError as e: print(e) # Parse the YAML configuration try: plugin_data = yaml.safe_load(yaml_data) return plugin_data['plugins'] except yaml.YAMLError as e: print(e)
Example #3
Source File: asgi.py From quart with MIT License | 7 votes |
def _create_websocket_from_scope(self, send: Callable) -> Websocket: headers = Headers() headers["Remote-Addr"] = (self.scope.get("client") or ["<local>"])[0] for name, value in self.scope["headers"]: headers.add(name.decode("latin1").title(), value.decode("latin1")) path = self.scope["path"] path = path if path[0] == "/" else urlparse(path).path return self.app.websocket_class( path, self.scope["query_string"], self.scope["scheme"], headers, self.scope.get("root_path", ""), self.scope.get("http_version", "1.1"), self.scope.get("subprotocols", []), self.queue.get, partial(self.send_data, send), partial(self.accept_connection, send), )
Example #4
Source File: diagnose.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 7 votes |
def test_connection(name, url, timeout=10): """Simple connection test""" urlinfo = urlparse(url) start = time.time() try: ip = socket.gethostbyname(urlinfo.netloc) except Exception as e: print('Error resolving DNS for {}: {}, {}'.format(name, url, e)) return dns_elapsed = time.time() - start start = time.time() try: _ = urlopen(url, timeout=timeout) except Exception as e: print("Error open {}: {}, {}, DNS finished in {} sec.".format(name, url, e, dns_elapsed)) return load_elapsed = time.time() - start print("Timing for {}: {}, DNS: {:.4f} sec, LOAD: {:.4f} sec.".format(name, url, dns_elapsed, load_elapsed))
Example #5
Source File: sublist3r.py From subtake with GNU General Public License v2.0 | 6 votes |
def extract_domains(self, resp): link_regx = re.compile('<li class="b_algo"><h2><a href="(.*?)"') link_regx2 = re.compile('<div class="b_title"><h2><a href="(.*?)"') try: links = link_regx.findall(resp) links2 = link_regx2.findall(resp) links_list = links + links2 for link in links_list: link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc if subdomain not in self.subdomains and subdomain != self.domain: if self.verbose: self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain)) self.subdomains.append(subdomain.strip()) except Exception: pass return links_list
Example #6
Source File: test_requests.py From sanic with MIT License | 6 votes |
def test_url_attributes_with_ssl_dict(app, path, query, expected_url): current_dir = os.path.dirname(os.path.realpath(__file__)) ssl_cert = os.path.join(current_dir, "certs/selfsigned.cert") ssl_key = os.path.join(current_dir, "certs/selfsigned.key") ssl_dict = {"cert": ssl_cert, "key": ssl_key} async def handler(request): return text("OK") app.add_route(handler, path) request, response = app.test_client.get( f"https://{HOST}:{PORT}" + path + f"?{query}", server_kwargs={"ssl": ssl_dict}, ) assert request.url == expected_url.format(HOST, request.server_port) parsed = urlparse(request.url) assert parsed.scheme == request.scheme assert parsed.path == request.path assert parsed.query == request.query_string assert parsed.netloc == request.host
Example #7
Source File: tiny_proxy.py From sslyze with GNU Affero General Public License v3.0 | 6 votes |
def do_GET(self): (scm, netloc, path, params, query, fragment) = urlparse(self.path, "http") if scm != "http" or fragment or not netloc: self.send_error(400, "bad url %s" % self.path) return soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: if self._connect_to(netloc, soc): self.log_request() soc.send( "%s %s %s\r\n" % (self.command, urlunparse(("", "", path, params, query, "")), self.request_version) ) self.headers["Connection"] = "close" del self.headers["Proxy-Connection"] for key_val in self.headers.items(): soc.send("%s: %s\r\n" % key_val) soc.send("\r\n") self._read_write(soc) finally: logging.warning("Finished do_GET()") soc.close() self.connection.close()
Example #8
Source File: utils.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def extract_http_scheme_host_port(http_url): '''Extract scheme, host and port from a HTTP URL. :param http_url: HTTP URL to extract. :type http_url: ``string`` :returns: A tuple of scheme, host and port :rtype: ``tuple`` :raises ValueError: If `http_url` is not in http(s)://hostname:port format. ''' try: http_info = urlparse.urlparse(http_url) except Exception: raise ValueError( str(http_url) + " is not in http(s)://hostname:port format") if not http_info.scheme or not http_info.hostname or not http_info.port: raise ValueError( http_url + " is not in http(s)://hostname:port format") return (http_info.scheme, http_info.hostname, http_info.port)
Example #9
Source File: __init__.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def __init__(self, splunkd_uri, session_key, schema): """ Global Config. :param splunkd_uri: :param session_key: :param schema: :type schema: GlobalConfigSchema """ self._splunkd_uri = splunkd_uri self._session_key = session_key self._schema = schema splunkd_info = urlparse(self._splunkd_uri) self._client = SplunkRestClient( self._session_key, self._schema.product, scheme=splunkd_info.scheme, host=splunkd_info.hostname, port=splunkd_info.port, ) self._configuration = Configuration(self._client, self._schema) self._inputs = Inputs(self._client, self._schema) self._configs = Configs(self._client, self._schema) self._settings = Settings(self._client, self._schema)
Example #10
Source File: promenade_client.py From drydock with Apache License 2.0 | 6 votes |
def _get_prom_url(self): # Get promenade url from Keystone session object ks_session = self._get_ks_session() try: prom_endpoint = ks_session.get_endpoint( interface='internal', service_type='kubernetesprovisioner') except exc.EndpointNotFound: self.logger.error("Could not find an internal interface" " defined in Keystone for Promenade") raise errors.DriverError("Could not find an internal interface" " defined in Keystone for Promenade") prom_url = urlparse(prom_endpoint) return prom_url
Example #11
Source File: base.py From zun with Apache License 2.0 | 6 votes |
def validate_link(self, link, bookmark=False): """Checks if the given link can get correct data.""" # removes the scheme and net location parts of the link url_parts = list(urlparse.urlparse(link)) url_parts[0] = url_parts[1] = '' # bookmark link should not have the version in the URL if bookmark and url_parts[2].startswith(PATH_PREFIX): return False full_path = urlparse.urlunparse(url_parts) try: self.get_json(full_path, path_prefix='') return True except Exception: return False
Example #12
Source File: evillib.py From wafw00f with BSD 3-Clause "New" or "Revised" License | 6 votes |
def urlParser(target): log = logging.getLogger('urlparser') ssl = False o = urlparse(target) if o[0] not in ['http', 'https', '']: log.error('scheme %s not supported' % o[0]) return if o[0] == 'https': ssl = True if len(o[2]) > 0: path = o[2] else: path = '/' tmp = o[1].split(':') if len(tmp) > 1: port = tmp[1] else: port = None hostname = tmp[0] query = o[4] return (hostname, port, path, query, ssl)
Example #13
Source File: url.py From Vxscan with Apache License 2.0 | 6 votes |
def dedup_link(urls): host = [] _ = [] furls = [] for i in set(urls): # 只保留有参数的url其余的略过 if '=' in i and not re.search(r"'|@|\+", i): # 判断url是不是伪静态,伪静态与普通的去重方法不一样 if re.search(r'/\?\d+=', i): furls.append(i) else: # 通过urlparse 对url进行去参去重,相同的丢弃 url = parse.urlparse(i) if url.netloc + url.path not in host: host.append(url.netloc + url.path) _.append(i) _.extend(diff(furls)) return _
Example #14
Source File: requests_usbmux.py From facebook-wda with MIT License | 6 votes |
def get_connection(self, url, proxies=None): proxies = proxies or {} proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: raise ValueError('%s does not support specifying proxies' % self.__class__.__name__) with self.pools.lock: pool = self.pools.get(url) if pool: return pool pool = UsbmuxHTTPConnectionPool(url, self.timeout) self.pools[url] = pool return pool
Example #15
Source File: asgi.py From quart with MIT License | 6 votes |
def _create_request_from_scope(self, send: Callable) -> Request: headers = Headers() headers["Remote-Addr"] = (self.scope.get("client") or ["<local>"])[0] for name, value in self.scope["headers"]: headers.add(name.decode("latin1").title(), value.decode("latin1")) if self.scope["http_version"] < "1.1": headers.setdefault("Host", self.app.config["SERVER_NAME"] or "") path = self.scope["path"] path = path if path[0] == "/" else urlparse(path).path return self.app.request_class( self.scope["method"], self.scope["scheme"], path, self.scope["query_string"], headers, self.scope.get("root_path", ""), self.scope["http_version"], max_content_length=self.app.config["MAX_CONTENT_LENGTH"], body_timeout=self.app.config["BODY_TIMEOUT"], send_push_promise=partial(self._send_push_promise, send), scope=self.scope, )
Example #16
Source File: sublist3r.py From subtake with GNU General Public License v2.0 | 6 votes |
def extract_domains(self, resp): link_regx = re.compile('<cite.*?>(.*?)<\/cite>') try: links_list = link_regx.findall(resp) for link in links_list: link = re.sub('<span.*>', '', link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc if subdomain and subdomain not in self.subdomains and subdomain != self.domain: if self.verbose: self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain)) self.subdomains.append(subdomain.strip()) except Exception: pass return links_list
Example #17
Source File: sublist3r.py From subtake with GNU General Public License v2.0 | 6 votes |
def extract_domains(self, resp): link_regx2 = re.compile('<span class=" fz-15px fw-m fc-12th wr-bw.*?">(.*?)</span>') link_regx = re.compile('<span class="txt"><span class=" cite fw-xl fz-15px">(.*?)</span>') links_list = [] try: links = link_regx.findall(resp) links2 = link_regx2.findall(resp) links_list = links + links2 for link in links_list: link = re.sub("<(\/)?b>", "", link) if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc if not subdomain.endswith(self.domain): continue if subdomain and subdomain not in self.subdomains and subdomain != self.domain: if self.verbose: self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain)) self.subdomains.append(subdomain.strip()) except Exception: pass return links_list
Example #18
Source File: sublist3r.py From subtake with GNU General Public License v2.0 | 6 votes |
def extract_domains(self, resp): link_regx = re.compile('<p class="web-result-url">(.*?)</p>') try: links_list = link_regx.findall(resp) for link in links_list: if not link.startswith('http'): link = "http://" + link subdomain = urlparse.urlparse(link).netloc if subdomain not in self.subdomains and subdomain != self.domain: if self.verbose: self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain)) self.subdomains.append(subdomain.strip()) except Exception: pass return links_list
Example #19
Source File: connections.py From python-esppy with Apache License 2.0 | 6 votes |
def __init__(self,session,**kwargs): tools.Options.__init__(self,**kwargs) self._session = session url = urlparse(self._session.conn_url) self._secure = False if url[0] == "https": self._secure = True s = url[1].split(":") self._host = s[0] self._port = s[1] self._websocket = None self._handshakeComplete = False self._headers = None self._authorization = None
Example #20
Source File: port_scan.py From Vxscan with Apache License 2.0 | 5 votes |
def pool(self): out = [] try: # 判断给出的url是www.baiud.com还是www.baidu.com/path这种形式 if (not parse.urlparse(self.ipaddr).path) and (parse.urlparse(self.ipaddr).path != '/'): self.ipaddr = self.ipaddr.replace('http://', '').replace('https://', '').rstrip('/') else: self.ipaddr = self.ipaddr.replace('http://', '').replace('https://', '').rstrip('/') self.ipaddr = re.sub(r'/\w+', '', self.ipaddr) if re.search(r'\d+\.\d+\.\d+\.\d+', self.ipaddr): ipaddr = self.ipaddr else: ipaddr = socket.gethostbyname(self.ipaddr) if ':' in ipaddr: ipaddr = re.sub(r':\d+', '', ipaddr) self.run(ipaddr) except Exception as e: pass if self.num == 0: self.save(self.ipaddr, self.out) for _ in self.out: out.append('{}:{}'.format(_.get('server'), _.get('port'))) console('PortScan', self.ipaddr, '{}:{}\n'.format(_.get('server'), _.get('port'))) return out else: self.save(self.ipaddr, [{"server": 'Portspoof', "port": '0', "banner": ''}]) console('PortScan', self.ipaddr, 'Portspoof:0\n') return ['Portspoof:0']
Example #21
Source File: file_utils.py From mrc-for-flat-nested-ner with Apache License 2.0 | 5 votes |
def split_s3_path(url: str) -> Tuple[str, str]: """Split a full s3 path into the bucket name and path.""" parsed = urlparse(url) if not parsed.netloc or not parsed.path: raise ValueError("bad s3 path {}".format(url)) bucket_name = parsed.netloc s3_path = parsed.path # Remove '/' at beginning of path. if s3_path.startswith("/"): s3_path = s3_path[1:] return bucket_name, s3_path
Example #22
Source File: verify.py From Vxscan with Apache License 2.0 | 5 votes |
def verify_https(url): # 验证域名是http或者https的 # 如果域名是302跳转 则获取跳转后的地址 req = Requests() # noinspection PyBroadException if '://' in url: try: r = req.get(url) return url except Exception as e: pass host = parse_host(url) url2 = parse.urlparse(url) if url2.netloc: url = url2.netloc elif url2.path: url = url2.path # noinspection PyBroadException try: r = req.get('https://' + url) getattr(r, 'status_code') console('Verify', host, 'https://' + url + '\n') return 'https://' + url except AttributeError: # noinspection PyBroadException try: req.get('http://' + url) console('Verify', host, 'http://' + url + '\n') return 'http://' + url except Exception: pass except Exception as e: logging.exception(e)
Example #23
Source File: file_utils.py From mrc-for-flat-nested-ner with Apache License 2.0 | 5 votes |
def cached_path(url_or_filename: Union[str, Path], cache_dir: Union[str, Path] = None) -> str: """ Given something that might be a URL (or might be a local path), determine which. If it's a URL, download the file and cache it, and return the path to the cached file. If it's already a local path, make sure the file exists and then return the path. """ if cache_dir is None: cache_dir = PYTORCH_PRETRAINED_BERT_CACHE if isinstance(url_or_filename, Path): url_or_filename = str(url_or_filename) if isinstance(cache_dir, Path): cache_dir = str(cache_dir) parsed = urlparse(url_or_filename) if parsed.scheme in ('http', 'https', 's3'): # URL, so get it from the cache (downloading if necessary) return get_from_cache(url_or_filename, cache_dir) elif os.path.exists(url_or_filename): # File, and it exists. return url_or_filename elif parsed.scheme == '': # File, but it doesn't exist. raise FileNotFoundError("file {} not found".format(url_or_filename)) else: # Something unknown raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
Example #24
Source File: url_checker.py From YaYaGen with BSD 2-Clause "Simplified" License | 5 votes |
def find_url_strings(self, url): """ Return php pages extracted from the urls """ strings = list() output = urlparse(url) if output.path.endswith(".php"): strings.append(output.path) return strings
Example #25
Source File: _https_client.py From oscrypto with MIT License | 5 votes |
def setup_connection(self, url, timeout): """ :param url: The URL to download :param timeout: The int number of seconds to set the timeout to :return: A boolean indicating if the connection was reused """ url_info = urlparse(url) if url_info.scheme == 'http': raise HttpsClientException('Can not connect to a non-TLS server') hostname = url_info.hostname port = url_info.port if not port: port = 443 if self.socket and self.url_info != (hostname, port): self.close() self.timeout = timeout self.url_info = (hostname, port) return self.ensure_connected()
Example #26
Source File: url.py From Vxscan with Apache License 2.0 | 5 votes |
def diff(urls): parms = [] host = [] result = [] path = [] # url = 'https://www.xxx.com/?page=1 # 伪静态去重 通过urlparse取出来page=1,根据逗号拆分取出来k=page,然后保存不重复的k for i in urls: url = parse.urlparse(i) print(url) k, v, *_ = url.query.split('=') if url.netloc in host: if url.path in path: if k not in parms: parms.append(k) result.append(i) else: result.append(i) path.append(url.path) else: host.append(url.netloc) result.append(i) path.append(url.path) parms.append(k) return result
Example #27
Source File: url.py From Vxscan with Apache License 2.0 | 5 votes |
def parse_host(url): # 根据url得到主机host 例如 http://1.1.1.1:80 返回 1.1.1.1 if (not parse.urlparse(url).path) and (parse.urlparse(url).path != '/'): host = url.replace('http://', '').replace('https://', '').rstrip('/') else: host = url.replace('http://', '').replace('https://', '').rstrip('/') host = re.sub(r'/\w+', '', host) if ':' in host: host = re.sub(r':\d+', '', host) return host
Example #28
Source File: crawl.py From Vxscan with Apache License 2.0 | 5 votes |
def dedup_url(urls): urls = list(set(urls)) result = [] okurl = [] for i in urls: urlparse = parse.urlparse(i) path = urlparse.path if path and path.split('/')[-2]: key = path.split('/')[-2] if key not in result: result.append(key) okurl.append(i) else: okurl.append(i) return okurl
Example #29
Source File: credentials.py From misp42splunk with GNU Lesser General Public License v3.0 | 5 votes |
def __init__( self, splunkd_uri, session_key, endpoint ): self._splunkd_uri = splunkd_uri self._splunkd_info = urlparse(self._splunkd_uri) self._session_key = session_key self._endpoint = endpoint self._realm = '__REST_CREDENTIAL__#{base_app}#{endpoint}'.format( base_app=get_base_app_name(), endpoint=self._endpoint.internal_endpoint.strip('/') )
Example #30
Source File: client.py From pywren-ibm-cloud with Apache License 2.0 | 5 votes |
def invoke(self, package, action_name, payload={}, is_ow_action=False, self_invoked=False): """ Invoke an IBM Cloud Function by using new request. """ url = '/'.join([self.endpoint, 'api', 'v1', 'namespaces', self.namespace, 'actions', package, action_name]) parsed_url = urlparse(url) try: if is_ow_action: resp = self.session.post(url, json=payload, verify=False) resp_status = resp.status_code data = resp.json() else: ctx = ssl._create_unverified_context() conn = http.client.HTTPSConnection(parsed_url.netloc, context=ctx) conn.request("POST", parsed_url.geturl(), body=json.dumps(payload), headers=self.headers) resp = conn.getresponse() resp_status = resp.status data = json.loads(resp.read().decode("utf-8")) conn.close() except Exception: if not is_ow_action: conn.close() if self_invoked: return None return self.invoke(package, action_name, payload, is_ow_action=is_ow_action, self_invoked=True) if resp_status == 202 and 'activationId' in data: return data["activationId"] elif resp_status == 429: return None # "Too many concurrent requests in flight" else: logger.debug(data) if resp_status == 401: raise Exception('Unauthorized - Invalid API Key') elif resp_status == 404: raise Exception('Runtime: {} not deployed'.format(action_name)) else: raise Exception(data['error'])