Python urllib.parse() Examples
The following are 30
code examples of urllib.parse().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urllib
, or try the search function
.
Example #1
Source File: SubmitOrderRequest.py From 12306 with MIT License | 6 votes |
def sendSubmitOrderRequest(self): """ 提交车次 预定的请求参数,注意参数顺序 注意这里为了防止secretStr被urllib.parse过度编码,在这里进行一次解码 否则调用HttpTester类的post方法将会将secretStr编码成为无效码,造成提交预定请求失败 :param secretStr: 提交车次加密 :return: """ submit_station_url = self.session.urls["submit_station_url"] submitResult = self.session.httpClint.send(submit_station_url, self.data_apr()) if 'data' in submitResult and submitResult['data']: if submitResult['data'] == 'N': coi = checkOrderInfo(self.session, self.train_no, self.set_type, self.passengerTicketStrList, self.oldPassengerStr, self.train_date, self.ticke_peoples) coi.sendCheckOrderInfo() else: print (u'出票失败') elif 'messages' in submitResult and submitResult['messages']: raise ticketIsExitsException(submitResult['messages'][0])
Example #2
Source File: pricing.py From thewarden with MIT License | 6 votes |
def __init__(self, name, base_url, ticker_field, field_dict=None, doc_link=None): # field dict includes all fields to be passed to the URL # for example, for Alphavantage # name = 'Alphavantage_digital' # base-url = 'https://www.alphavantage.co/query' # ticker_field = 'symbol' # field_dict = {'function': 'DIGITAL_CURRENCY_DAILY', # 'market': 'CNY', # 'apikey': 'demo') # doc_link = 'https://www.alphavantage.co/documentation/' # parse_dict = {'open' : '1a. open (USD)', ...} self.name = name.lower() self.base_url = base_url self.ticker_field = ticker_field self.field_dict = field_dict self.doc_link = doc_link if self.field_dict is not None: self.url_args = "&" + urllib.parse.urlencode(field_dict) self.errors = []
Example #3
Source File: xmlbuilder.py From jawfish with MIT License | 6 votes |
def resolveEntity(self, publicId, systemId): assert systemId is not None source = DOMInputSource() source.publicId = publicId source.systemId = systemId source.byteStream = self._get_opener().open(systemId) # determine the encoding if the transport provided it source.encoding = self._guess_media_encoding(source) # determine the base URI is we can import posixpath, urllib.parse parts = urllib.parse.urlparse(systemId) scheme, netloc, path, params, query, fragment = parts # XXX should we check the scheme here as well? if path and not path.endswith("/"): path = posixpath.dirname(path) + "/" parts = scheme, netloc, path, params, query, fragment source.baseURI = urllib.parse.urlunparse(parts) return source
Example #4
Source File: resolver.py From drydock with Apache License 2.0 | 6 votes |
def resolve_reference_ucp(cls, design_uri): """Retrieve artifacts from a Airship service endpoint. Return a byte array of the response content. Assumes Keystone authentication required. :param design_uri: Tuple as returned by urllib.parse for the design reference """ ks_sess = KeystoneUtils.get_session() (new_scheme, foo) = re.subn(r'^[^+]+\+', '', design_uri.scheme) url = urllib.parse.urlunparse( (new_scheme, design_uri.netloc, design_uri.path, design_uri.params, design_uri.query, design_uri.fragment)) LOG.debug("Calling Keystone session for url %s" % str(url)) resp = ks_sess.get(url, timeout=get_client_timeouts()) if resp.status_code >= 400: raise errors.InvalidDesignReference( "Received error code for reference %s: %s - %s" % (url, str(resp.status_code), resp.text)) return resp.content
Example #5
Source File: main.py From oabot with MIT License | 6 votes |
def bot_is_allowed(text, user): """ Taken from https://en.wikipedia.org/wiki/Template:Bots For bot exclusion compliance. """ user = user.lower().strip() text = mwparserfromhell.parse(text) for tl in text.filter_templates(): if tl.name in ('bots', 'nobots'): break else: return True for param in tl.params: bots = [x.lower().strip() for x in param.value.split(",")] if param.name == 'allow': if ''.join(bots) == 'none': return False for bot in bots: if bot in (user, 'all'): return True elif param.name == 'deny': if ''.join(bots) == 'none': return True for bot in bots: if bot in (user, 'all'): return False return True
Example #6
Source File: __init__.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def _parse_cache_control(headers): retval = {} if "cache-control" in headers: parts = headers["cache-control"].split(",") parts_with_args = [ tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=") ] parts_wo_args = [ (name.strip().lower(), 1) for name in parts if -1 == name.find("=") ] retval = dict(parts_with_args + parts_wo_args) return retval # Whether to use a strict mode to parse WWW-Authenticate headers # Might lead to bad results in case of ill-formed header value, # so disabled by default, falling back to relaxed parsing. # Set to true to turn on, usefull for testing servers.
Example #7
Source File: spotify_web.py From raveberry with GNU Lesser General Public License v3.0 | 6 votes |
def _prepare_url(self, url, *args, **kwargs): # TODO: Move this out as a helper and unit-test it directly? b = urllib.parse.urlsplit(self._base_url) u = urllib.parse.urlsplit(url.format(*args)) if u.scheme or u.netloc: scheme, netloc, path = u.scheme, u.netloc, u.path query = urllib.parse.parse_qsl(u.query, keep_blank_values=True) else: scheme, netloc = b.scheme, b.netloc path = os.path.normpath(os.path.join(b.path, u.path)) query = urllib.parse.parse_qsl(b.query, keep_blank_values=True) query.extend(urllib.parse.parse_qsl(u.query, keep_blank_values=True)) for key, value in kwargs.items(): query.append((key, value)) encoded_query = urllib.parse.urlencode(dict(query)) return urllib.parse.urlunsplit((scheme, netloc, path, encoded_query, ""))
Example #8
Source File: urlutils.py From qutebrowser with GNU General Public License v3.0 | 6 votes |
def qurl_from_user_input(urlstr: str) -> QUrl: """Get a QUrl based on a user input. Additionally handles IPv6 addresses. QUrl.fromUserInput handles something like '::1' as a file URL instead of an IPv6, so we first try to handle it as a valid IPv6, and if that fails we use QUrl.fromUserInput. WORKAROUND - https://bugreports.qt.io/browse/QTBUG-41089 FIXME - Maybe https://codereview.qt-project.org/#/c/93851/ has a better way to solve this? https://github.com/qutebrowser/qutebrowser/issues/109 Args: urlstr: The URL as string. Return: The converted QUrl. """ # First we try very liberally to separate something like an IPv6 from the # rest (e.g. path info or parameters) match = re.fullmatch(r'\[?([0-9a-fA-F:.]+)\]?(.*)', urlstr.strip()) if match: ipstr, rest = match.groups() else: ipstr = urlstr.strip() rest = '' # Then we try to parse it as an IPv6, and if we fail use # QUrl.fromUserInput. try: ipaddress.IPv6Address(ipstr) except ipaddress.AddressValueError: return QUrl.fromUserInput(urlstr) else: return QUrl('http://[{}]{}'.format(ipstr, rest))
Example #9
Source File: urlutils.py From qutebrowser with GNU General Public License v3.0 | 6 votes |
def filename_from_url(url: QUrl) -> typing.Optional[str]: """Get a suitable filename from a URL. Args: url: The URL to parse, as a QUrl. Return: The suggested filename as a string, or None. """ if not url.isValid(): return None pathname = posixpath.basename(url.path()) if pathname: return pathname elif url.host(): return url.host() + '.html' else: return None
Example #10
Source File: mobile_survey.py From arches with GNU Affero General Public License v3.0 | 6 votes |
def serialize(self, fields=None, exclude=None): """ serialize to a different form than used by the internal class structure used to append additional values (like parent ontology properties) that internal objects (like models.Nodes) don't support """ serializer = JSONSerializer() serializer.geom_format = "geojson" obj = serializer.handle_model(self) ordered_cards = self.get_ordered_cards() ret = JSONSerializer().serializeToPython(obj) ret["cards"] = ordered_cards try: bounds = json.loads(ret["bounds"]) ret["bounds"] = bounds if bounds["type"] == "MultiPolygon": singlepart = GeoUtils().convert_multipart_to_singlepart(bounds) ret["bounds"] = singlepart except TypeError as e: print("Could not parse", ret["bounds"], e) return ret
Example #11
Source File: CTD.py From dipper with BSD 3-Clause "New" or "Revised" License | 6 votes |
def parse(self, limit=None): """ Override Source.parse() Parses version and interaction information from CTD Args: :param limit (int, optional) limit the number of rows processed Returns: :return None """ if limit is not None: LOG.info("Only parsing first %d rows", limit) LOG.info("Parsing files...") if self.test_only: self.test_mode = True self.geno = Genotype(self.graph) self.pathway = Pathway(self.graph) src_key = 'chemical_disease_associations' self._parse_ctd_file(limit, src_key) # self._parse_ctd_file(limit, 'gene_pathway') # self._parse_ctd_file(limit, 'gene_disease')
Example #12
Source File: client.py From aiocouchdb with BSD 2-Clause "Simplified" License | 6 votes |
def extract_credentials(url): """Extract authentication (user name and password) credentials from the given URL. >>> extract_credentials('http://localhost:5984/_config/') ('http://localhost:5984/_config/', None) >>> extract_credentials('http://joe:secret@localhost:5984/_config/') ('http://localhost:5984/_config/', ('joe', 'secret')) >>> extract_credentials('http://joe%40example.com:secret@localhost:5984/_config/') ('http://localhost:5984/_config/', ('joe@example.com', 'secret')) """ parts = urllib.parse.urlsplit(url) netloc = parts[1] if '@' in netloc: creds, netloc = netloc.split('@') credentials = tuple(urllib.parse.unquote(i) for i in creds.split(':')) parts = list(parts) parts[1] = netloc else: credentials = None return urllib.parse.urlunsplit(parts), credentials
Example #13
Source File: resolver.py From drydock with Apache License 2.0 | 6 votes |
def resolve_reference_http(cls, design_uri): """Retrieve design documents from http/https endpoints. Return a byte array of the response content. Support unsecured or basic auth :param design_uri: Tuple as returned by urllib.parse for the design reference """ if design_uri.username is not None and design_uri.password is not None: response = requests.get( design_uri.geturl(), auth=(design_uri.username, design_uri.password), timeout=get_client_timeouts()) else: response = requests.get( design_uri.geturl(), timeout=get_client_timeouts()) return response.content
Example #14
Source File: SubmitOrderRequest.py From 12306 with MIT License | 6 votes |
def __init__(self, selectObj, secretStr, from_station, to_station, train_no, set_type, passengerTicketStrList, oldPassengerStr, train_date, ticke_peoples): self.session = selectObj # self.secretStr = secretStr try: self.secretStr = urllib.unquote(secretStr) except AttributeError: self.secretStr = urllib.parse.unquote(secretStr) self.from_station = from_station self.to_station = to_station self.to_station = to_station self.train_no = train_no self.set_type = set_type self.passengerTicketStrList = passengerTicketStrList self.oldPassengerStr = oldPassengerStr self.train_date = train_date self.ticke_peoples = ticke_peoples
Example #15
Source File: lti_xblock.py From xblock-lti-consumer with GNU Affero General Public License v3.0 | 6 votes |
def lti_provider_key_secret(self): """ Obtains client_key and client_secret credentials from current course. """ for lti_passport in self.course.lti_passports: try: lti_id, key, secret = [i.strip() for i in lti_passport.split(':')] except ValueError: msg = 'Could not parse LTI passport: {lti_passport!r}. Should be "id:key:secret" string.' msg = self.ugettext(msg).format(lti_passport=lti_passport) raise LtiError(msg) if lti_id == self.lti_id.strip(): return key, secret return '', ''
Example #16
Source File: lti_xblock.py From xblock-lti-consumer with GNU Affero General Public License v3.0 | 6 votes |
def parse_handler_suffix(suffix): """ Parser function for HTTP request path suffixes parses the suffix argument (the trailing parts of the URL) of the LTI2.0 REST handler. must be of the form "user/<anon_id>". Returns anon_id if match found, otherwise raises LtiError Arguments: suffix (unicode): suffix to parse Returns: unicode: anon_id if match found Raises: LtiError if suffix cannot be parsed or is not in its expected form """ if suffix: match_obj = RESULT_SERVICE_SUFFIX_PARSER.match(suffix) if match_obj: return match_obj.group('anon_id') # fall-through handles all error cases msg = _("No valid user id found in endpoint URL") log.info("[LTI]: %s", msg) raise LtiError(msg)
Example #17
Source File: imageverify.py From ProjectFib with Open Software License 3.0 | 6 votes |
def main(link): #link = "http://i.imgur.com/walokrp.png" tokens = [urllib.parse.urlparse(url) for url in ("",link)] count = 0 min_attributes = ('scheme', 'netloc') # add attrs to your liking for token in tokens: if not all([getattr(token, attr) for attr in min_attributes]): if count > 0: print("no link") else: count += 1 else: if ".jpg" in link or ".png" in link: if no_adult_content("{\"url\":\"" + link+"\"}") and twitter_present(link): return "Verified" else: return "Not Verified" else: return other_links(link)
Example #18
Source File: __init__.py From benchexec with Apache License 2.0 | 6 votes |
def insert_logfile_names(resultFile, resultElem): # get folder of logfiles (truncate end of XML file name and append .logfiles instead) log_folder = resultFile[0 : resultFile.rfind(".results.")] + ".logfiles/" # append begin of filename runSetName = resultElem.get("name") if runSetName is not None: blockname = resultElem.get("block") if blockname is None: log_folder += runSetName + "." elif blockname == runSetName: pass # real runSetName is empty else: assert runSetName.endswith("." + blockname) runSetName = runSetName[: -(1 + len(blockname))] # remove last chars log_folder += runSetName + "." # for each file: append original filename and insert log_file_name into sourcefileElement for sourcefile in _get_run_tags_from_xml(resultElem): if "logfile" in sourcefile.attrib: log_file = urllib.parse.urljoin(resultFile, sourcefile.get("logfile")) else: log_file = log_folder + os.path.basename(sourcefile.get("name")) + ".log" sourcefile.set("logfile", log_file)
Example #19
Source File: __init__.py From benchexec with Apache License 2.0 | 6 votes |
def parse_table_definition_file(file): """ Read an parse the XML of a table-definition file. @return: an ElementTree object for the table definition """ logging.info("Reading table definition from '%s'...", file) if not os.path.isfile(file): handle_error("File '%s' does not exist.", file) try: tableGenFile = ElementTree.ElementTree().parse(file) except OSError as e: handle_error("Could not read result file %s: %s", file, e) except ElementTree.ParseError as e: handle_error("Table file %s is invalid: %s", file, e) if "table" != tableGenFile.tag: handle_error( "Table file %s is invalid: It's root element is not named 'table'.", file ) return tableGenFile
Example #20
Source File: http.py From dionaea with GNU General Public License v2.0 | 6 votes |
def __init__(self, header): hlines = header.split(b'\n') req = hlines[0] reqparts = req.split(b" ") self.type = reqparts[0] self.path = urllib.parse.unquote(reqparts[1].decode('utf-8')) self.version = reqparts[2] r = self.version.find(b"\r") if r: self.version = self.version[:r] self.headers = {} for hline in hlines[1:]: if hline[len(hline)-1] == 13: # \r hline = hline[:len(hline)-1] hset = hline.split(b":", 1) self.headers[hset[0].lower()] = hset[1].strip()
Example #21
Source File: rdfvalue.py From pyaff4 with Apache License 2.0 | 6 votes |
def _Parse(self, value): components = urllib.parse.urlparse(value) # dont normalise path for http URI's if components.scheme and not components.scheme == "http": normalized_path = posixpath.normpath(components.path) if normalized_path == ".": normalized_path = "" components = components._replace(path=normalized_path) if not components.scheme: # For file:// URNs, we need to parse them from a filename. components = components._replace( netloc="", path=urllib.request.pathname2url(value), scheme="file") self.original_filename = value return components
Example #22
Source File: example_tools.py From buzzard with Apache License 2.0 | 6 votes |
def _url_status(url): parse_obj = urllib.parse.urlparse(url) timer = 1 for i in range(6): try: connection = http.client.HTTPConnection(parse_obj.netloc) connection.request('HEAD', parse_obj.path) break except Exception as e: print(url, e, 'sleep', timer) time.sleep(timer) timer *= 2 else: return e response = connection.getresponse() connection.close() return response.status
Example #23
Source File: __init__.py From benchexec with Apache License 2.0 | 5 votes |
def normalize_path(path, base_path_or_url): """Returns a normalized form of path, interpreted relative to base_path_or_url""" if util.is_url(base_path_or_url): return urllib.parse.urljoin(base_path_or_url, path) else: return os.path.normpath(os.path.join(os.path.dirname(base_path_or_url), path))
Example #24
Source File: io_manager.py From rekall with GNU General Public License v2.0 | 5 votes |
def _GetURL(self, name): url = self.url._replace(path="%s/%s/%s" % ( self.url.path, self.version, name)) return urllib.parse.urlunparse(url)
Example #25
Source File: sublist3r.py From subtake with GNU General Public License v2.0 | 5 votes |
def parse_args(): # parse the arguments parser = argparse.ArgumentParser(epilog='\tExample: \r\npython ' + sys.argv[0] + " -d google.com") parser.error = parser_error parser._optionals.title = "OPTIONS" parser.add_argument('-d', '--domain', help="Domain name to enumerate it's subdomains", required=True) parser.add_argument('-b', '--bruteforce', help='Enable the subbrute bruteforce module', nargs='?', default=False) parser.add_argument('-p', '--ports', help='Scan the found subdomains against specified tcp ports') parser.add_argument('-v', '--verbose', help='Enable Verbosity and display results in realtime', nargs='?', default=False) parser.add_argument('-t', '--threads', help='Number of threads to use for subbrute bruteforce', type=int, default=30) parser.add_argument('-e', '--engines', help='Specify a comma-separated list of search engines') parser.add_argument('-o', '--output', help='Save the results to text file') return parser.parse_args()
Example #26
Source File: imageverify.py From ProjectFib with Open Software License 3.0 | 5 votes |
def other_links(url): """ Uses Microsoft's Cognitive API to evaluate the quality of a webpage, and suggest better information if possible. """ link_verified = verified_links(url) if link_verified == "not verified": st = url_title(url) import http.client, urllib.request, urllib.parse, urllib.error headers = { 'Ocp-Apim-Subscription-Key': MICROSOFT_SEARCH_SUBSCRIPTION_KEY,} params = urllib.parse.urlencode({'q': st, 'count': '10', 'offset': '0', 'mkt': 'en-us','safesearch': 'Moderate',}) try: conn = http.client.HTTPSConnection('api.cognitive.microsoft.com') conn.request("GET", "/bing/v5.0/search?%s" % params, "", headers) response = conn.getresponse() data = response.read() #print(data) data = json.loads(data.decode("utf-8")) for alt_url in data['webPages']['value']: if alt_url['displayUrl'] != url: urlscores = verified_links(alt_url['displayUrl']) if urlscores == "verified": alternative_summary = "Non verified. Better Verified Info is : "+summarization(alt_url['displayUrl']) return alternative_summary conn.close() return "no verified links" except Exception as e: print("[Errno {0}] {1}".format(e.errno, e.strerror)) else: return link_verified
Example #27
Source File: imageverify.py From ProjectFib with Open Software License 3.0 | 5 votes |
def no_adult_content(body): """ Use Microsoft's Project Oxford Computer Vision API to detect Adult/NSFW content in images. Returns True if content is Safe For Work (SFW), and False otherwise. """ is_adult = False is_racy = False headers = {'Content-Type': 'application/json', 'Ocp-Apim-Subscription-Key': MICROSOFT_CV_SUBSCRIPTION_KEY,} params = urllib.parse.urlencode({'visualFeatures': 'Adult', 'language': 'en',}) #body = "{\"url\":\"http://www.gettyimages.ca/gi-resources/images/Homepage/Hero/UK/CMS_Creative_164657191_Kingfisher.jpg\"}" microsoft_project_oxford_endpoint = 'api.projectoxford.ai' try: conn = http.client.HTTPSConnection(microsoft_project_oxford_endpoint) conn.request("POST", "/vision/v1.0/analyze?%s" % params, body, headers) response = conn.getresponse() data = response.read() data = json.loads(data.decode("utf-8")) is_adult = data['adult']['isAdultContent'] is_racy = data['adult']['isRacyContent'] conn.close() except Exception as e: print("[Errno {0}] {1}".format(e.errno, e.strerror)) return not is_adult and not is_racy #no_adult_content("{\"url\":\"http://www.gettyimages.ca/gi-resources/images/Homepage/Hero/UK/CMS_Creative_164657191_Kingfisher.jpg\"}") # TODO this function's variable names are pretty confusing, and there's not enough informative # commenting for a 80 line function. The function is likely still too long.
Example #28
Source File: xmlbuilder.py From jawfish with MIT License | 5 votes |
def parseURI(self, uri): if self.entityResolver: input = self.entityResolver.resolveEntity(None, uri) else: input = DOMEntityResolver().resolveEntity(None, uri) return self.parse(input)
Example #29
Source File: __init__.py From benchexec with Apache License 2.0 | 5 votes |
def load_result( result_file, options, run_set_id=None, columns=None, columns_relevant_for_diff=set() ): """ Completely handle loading a single result file. @param result_file the file to parse @param options additional options @param run_set_id the identifier of the run set @param columns the list of columns @param columns_relevant_for_diff a set of columns that is relevant for the diff table @return a fully ready RunSetResult instance or None """ xml = parse_results_file( result_file, run_set_id=run_set_id, ignore_errors=options.ignore_errors ) if xml is None: return None result = RunSetResult.create_from_xml( result_file, xml, columns=columns, all_columns=options.all_columns, columns_relevant_for_diff=columns_relevant_for_diff, ) result.collect_data(options.correct_only) return result
Example #30
Source File: pricing.py From thewarden with MIT License | 5 votes |
def request_data(self, ticker): data = None if self.base_url is not None: ticker = ticker.upper() globalURL = (self.base_url + "?" + self.ticker_field + "=" + ticker + self.url_args) # Some APIs use the ticker without a ticker field i.e. xx.xx./AAPL&... # in these cases, we pass the ticker field as empty if self.ticker_field == '': if self.url_args[0] == '&': self.url_args = self.url_args.replace('&', '?', 1) globalURL = (self.base_url + "/" + ticker + self.url_args) request = tor_request(globalURL) try: data = request.json() except Exception: try: # Try again - some APIs return a json already data = json.loads(request) except Exception as e: self.errors.append(e) return (data) # PriceData Class Information # Example on how to create a ticker class (PriceData) # provider = PROVIDER_LIST['cc_digital'] # btc = PriceData("BTC", provider) # btc.errors: Any error messages # btc.provider: Provider being used for requests # btc.filename: Local filename where historical prices are saved # Other info: # btc.ticker, btc.last_update, btc.first_update, btc.last_close # btc.update_history(force=False) # btc.df_fx(currency, fx_provider): returns a df with # prices and fx conversions # btc.price_ondate(date) # btc.price_parser(): do not use directly. This is used to parse # the requested data from the API provider # btc.realtime(provider): returns realtime price (float)