Python urllib.parse.urldefrag() Examples
The following are 16
code examples of urllib.parse.urldefrag().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urllib.parse
, or try the search function
.
Example #1
Source File: tornado-crawler-demo1.py From Python_Master_Courses with GNU General Public License v3.0 | 6 votes |
def remove_fragment(url): pure_url, frag = urldefrag(url) return pure_url
Example #2
Source File: expander.py From dspl with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _ExpandFootnotes(self): for result in self.graph.query( MakeSparqlSelectQuery( ('?ds', 'a', 'schema:StatisticalDataset'), ('?ds', 'schema:footnote', '?fn'), ns_manager=self.graph.namespace_manager)): if result['fn'] not in self.subjects: self.graph.remove((result['ds'], SCHEMA.footnote, result['fn'])) id_prefix = urldefrag(str(result['ds'])).url with self.getter.Fetch(str(result['fn'])) as f: reader = DictReader(f) for row in reader: row_id = rdflib.URIRef(id_prefix + '#footnote=' + row['codeValue']) self.graph.add((result['ds'], SCHEMA.footnote, row_id)) self.graph.add((row_id, rdflib.RDF.type, SCHEMA.StatisticalAnnotation)) for key, val in row.items(): fields = key.split('@') if len(fields) > 1: # A language code is specified self.graph.add((row_id, getattr(SCHEMA, fields[0]), rdflib.Literal(val, language=fields[1]))) else: self.graph.add((row_id, getattr(SCHEMA, key), rdflib.Literal(val)))
Example #3
Source File: expander.py From dspl with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _MakeSliceDataRowId(self, slice_id, dims, measures, row, tableMappings): ret = str(slice_id) if not urldefrag(slice_id).fragment: ret += '#' else: ret += '/' for dim in dims: dim_key = dim for tableMapping in tableMappings: if tableMapping['sourceEntity'] == dim: if tableMapping['columnIdentifier']: dim_key = str(tableMapping['columnIdentifier']) break ret += dim + '=' + row[dim_key] ret += '/' for measure in measures: ret += measure ret += '/' return ret
Example #4
Source File: ref_resolver.py From python-fastjsonschema with BSD 3-Clause "New" or "Revised" License | 6 votes |
def resolving(self, ref: str): """ Context manager which resolves a JSON ``ref`` and enters the resolution scope of this ref. """ new_uri = urlparse.urljoin(self.resolution_scope, ref) uri, fragment = urlparse.urldefrag(new_uri) if normalize(uri) in self.store: schema = self.store[normalize(uri)] elif not uri or uri == self.base_uri: schema = self.schema else: schema = resolve_remote(uri, self.handlers) if self.cache: self.store[normalize(uri)] = schema old_base_uri, old_schema = self.base_uri, self.schema self.base_uri, self.schema = uri, schema try: with self.in_scope(uri): yield resolve_path(schema, fragment) finally: self.base_uri, self.schema = old_base_uri, old_schema
Example #5
Source File: webspider.py From tornado-zh with MIT License | 5 votes |
def remove_fragment(url): pure_url, frag = urldefrag(url) return pure_url
Example #6
Source File: tornado-crawler-demo2.py From Python_Master_Courses with GNU General Public License v3.0 | 5 votes |
def remove_fragment(self, url): pure_url, frag = urldefrag(url) return pure_url # 使用HTMLParser分析html,获取到里面的urls,也可以使用BeautifulSoup等.
Example #7
Source File: expander.py From dspl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _GetDimensionDataForSlice(self, slice_id, tableMappings): ret = {} dims = sorted( self.graph.objects( subject=slice_id, predicate=SCHEMA.dimension)) for dim_id in dims: dim_type = list(self.graph.objects( subject=dim_id, predicate=rdflib.RDF.type)) dim_equiv_types = list(self.graph.objects( subject=dim_id, predicate=SCHEMA.equivalentType)) csv_id = urldefrag(dim_id).fragment for tableMapping in tableMappings: if tableMapping['sourceEntity'] == dim_id: csv_id = str(tableMapping['columnIdentifier']) break if not csv_id: print("Unable to determine CSV ID for dimension", dim_id, file=sys.stderr) exit(1) ret[csv_id] = { 'id': dim_id, 'type': dim_type, 'types': dim_equiv_types } return ret
Example #8
Source File: expander.py From dspl with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _GetMeasureDataForSlice(self, slice_id, tableMappings): ret = {} measures = sorted( self.graph.objects( subject=slice_id, predicate=SCHEMA.measure)) for measure_id in measures: unit_codes = list(self.graph.objects( subject=measure_id, predicate=SCHEMA.unitCode)) unit_texts = list(self.graph.objects( subject=measure_id, predicate=SCHEMA.unitText)) csv_id = urldefrag(measure_id).fragment for tableMapping in tableMappings: if tableMapping['sourceEntity'] == measure_id: csv_id = str(tableMapping['columnIdentifier']) break if not csv_id: print("Unable to determine CSV ID for metric", measure_id, file=sys.stderr) exit(1) ret[csv_id] = { 'id': measure_id, 'unit_code': unit_codes, 'unit_text': unit_texts, } return ret
Example #9
Source File: crawlerino.py From crawlerino with MIT License | 5 votes |
def getlinks(pageurl, domain, soup): """Returns a list of links from from this page to be crawled. pageurl = URL of this page domain = domain being crawled (None to return links to *any* domain) soup = BeautifulSoup object for this page """ # get target URLs for all links on the page links = [a.attrs.get("href") for a in soup.select("a[href]")] # remove fragment identifiers links = [urldefrag(link)[0] for link in links] # remove any empty strings links = [link for link in links if link] # if it's a relative link, change to absolute links = [ link if bool(urlparse(link).netloc) else urljoin(pageurl, link) for link in links ] # if only crawing a single domain, remove links to other domains if domain: links = [link for link in links if samedomain(urlparse(link).netloc, domain)] return links
Example #10
Source File: storage.py From bioforum with MIT License | 5 votes |
def _url(self, hashed_name_func, name, force=False, hashed_files=None): """ Return the non-hashed URL in DEBUG mode. """ if settings.DEBUG and not force: hashed_name, fragment = name, '' else: clean_name, fragment = urldefrag(name) if urlsplit(clean_name).path.endswith('/'): # don't hash paths hashed_name = name else: args = (clean_name,) if hashed_files is not None: args += (hashed_files,) hashed_name = hashed_name_func(*args) final_url = super().url(hashed_name) # Special casing for a @font-face hack, like url(myfont.eot?#iefix") # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax query_fragment = '?#' in name # [sic!] if fragment or query_fragment: urlparts = list(urlsplit(final_url)) if fragment and not urlparts[4]: urlparts[4] = fragment if query_fragment and not urlparts[3]: urlparts[2] += '?' final_url = urlunsplit(urlparts) return unquote(final_url)
Example #11
Source File: client.py From Safejumper-for-Desktop with GNU General Public License v2.0 | 5 votes |
def _urljoin(base, url): """ Construct a full ("absolute") URL by combining a "base URL" with another URL. Informally, this uses components of the base URL, in particular the addressing scheme, the network location and (part of) the path, to provide missing components in the relative URL. Additionally, the fragment identifier is preserved according to the HTTP 1.1 bis draft. @type base: C{bytes} @param base: Base URL. @type url: C{bytes} @param url: URL to combine with C{base}. @return: An absolute URL resulting from the combination of C{base} and C{url}. @see: L{urlparse.urljoin} @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2} """ base, baseFrag = urldefrag(base) url, urlFrag = urldefrag(urljoin(base, url)) return urljoin(url, b'#' + (urlFrag or baseFrag))
Example #12
Source File: client.py From learn_python3_spider with MIT License | 5 votes |
def _urljoin(base, url): """ Construct a full ("absolute") URL by combining a "base URL" with another URL. Informally, this uses components of the base URL, in particular the addressing scheme, the network location and (part of) the path, to provide missing components in the relative URL. Additionally, the fragment identifier is preserved according to the HTTP 1.1 bis draft. @type base: C{bytes} @param base: Base URL. @type url: C{bytes} @param url: URL to combine with C{base}. @return: An absolute URL resulting from the combination of C{base} and C{url}. @see: L{urlparse.urljoin} @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2} """ base, baseFrag = urldefrag(base) url, urlFrag = urldefrag(urljoin(base, url)) return urljoin(url, b'#' + (urlFrag or baseFrag))
Example #13
Source File: storage.py From Hands-On-Application-Development-with-PyCharm with MIT License | 5 votes |
def _url(self, hashed_name_func, name, force=False, hashed_files=None): """ Return the non-hashed URL in DEBUG mode. """ if settings.DEBUG and not force: hashed_name, fragment = name, '' else: clean_name, fragment = urldefrag(name) if urlsplit(clean_name).path.endswith('/'): # don't hash paths hashed_name = name else: args = (clean_name,) if hashed_files is not None: args += (hashed_files,) hashed_name = hashed_name_func(*args) final_url = super().url(hashed_name) # Special casing for a @font-face hack, like url(myfont.eot?#iefix") # http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax query_fragment = '?#' in name # [sic!] if fragment or query_fragment: urlparts = list(urlsplit(final_url)) if fragment and not urlparts[4]: urlparts[4] = fragment if query_fragment and not urlparts[3]: urlparts[2] += '?' final_url = urlunsplit(urlparts) return unquote(final_url)
Example #14
Source File: storage.py From bioforum with MIT License | 4 votes |
def url_converter(self, name, hashed_files, template=None): """ Return the custom URL converter for the given file name. """ if template is None: template = self.default_template def converter(matchobj): """ Convert the matched URL to a normalized and hashed URL. This requires figuring out which files the matched URL resolves to and calling the url() method of the storage. """ matched, url = matchobj.groups() # Ignore absolute/protocol-relative and data-uri URLs. if re.match(r'^[a-z]+:', url): return matched # Ignore absolute URLs that don't point to a static file (dynamic # CSS / JS?). Note that STATIC_URL cannot be empty. if url.startswith('/') and not url.startswith(settings.STATIC_URL): return matched # Strip off the fragment so a path-like fragment won't interfere. url_path, fragment = urldefrag(url) if url_path.startswith('/'): # Otherwise the condition above would have returned prematurely. assert url_path.startswith(settings.STATIC_URL) target_name = url_path[len(settings.STATIC_URL):] else: # We're using the posixpath module to mix paths and URLs conveniently. source_name = name if os.sep == '/' else name.replace(os.sep, '/') target_name = posixpath.join(posixpath.dirname(source_name), url_path) # Determine the hashed name of the target file with the storage backend. hashed_url = self._url( self._stored_name, unquote(target_name), force=True, hashed_files=hashed_files, ) transformed_url = '/'.join(url_path.split('/')[:-1] + hashed_url.split('/')[-1:]) # Restore the fragment that was stripped off earlier. if fragment: transformed_url += ('?#' if '?#' in url else '#') + fragment # Return the hashed version to the file return template % unquote(transformed_url) return converter
Example #15
Source File: storage.py From Hands-On-Application-Development-with-PyCharm with MIT License | 4 votes |
def url_converter(self, name, hashed_files, template=None): """ Return the custom URL converter for the given file name. """ if template is None: template = self.default_template def converter(matchobj): """ Convert the matched URL to a normalized and hashed URL. This requires figuring out which files the matched URL resolves to and calling the url() method of the storage. """ matched, url = matchobj.groups() # Ignore absolute/protocol-relative and data-uri URLs. if re.match(r'^[a-z]+:', url): return matched # Ignore absolute URLs that don't point to a static file (dynamic # CSS / JS?). Note that STATIC_URL cannot be empty. if url.startswith('/') and not url.startswith(settings.STATIC_URL): return matched # Strip off the fragment so a path-like fragment won't interfere. url_path, fragment = urldefrag(url) if url_path.startswith('/'): # Otherwise the condition above would have returned prematurely. assert url_path.startswith(settings.STATIC_URL) target_name = url_path[len(settings.STATIC_URL):] else: # We're using the posixpath module to mix paths and URLs conveniently. source_name = name if os.sep == '/' else name.replace(os.sep, '/') target_name = posixpath.join(posixpath.dirname(source_name), url_path) # Determine the hashed name of the target file with the storage backend. hashed_url = self._url( self._stored_name, unquote(target_name), force=True, hashed_files=hashed_files, ) transformed_url = '/'.join(url_path.split('/')[:-1] + hashed_url.split('/')[-1:]) # Restore the fragment that was stripped off earlier. if fragment: transformed_url += ('?#' if '?#' in url else '#') + fragment # Return the hashed version to the file return template % unquote(transformed_url) return converter
Example #16
Source File: namespace.py From ontology-visualization with MIT License | 4 votes |
def absolutize(self, uri, defrag=1): base = urljoin("file:", pathname2url(os.getcwd())) result = urljoin("%s/" % base, uri, allow_fragments=not defrag) if defrag: result = urldefrag(result)[0] if not defrag: if uri and uri[-1] == "#" and result[-1] != "#": result = "%s#" % result return URIRef(result) # From: http://www.w3.org/TR/REC-xml#NT-CombiningChar # # * Name start characters must have one of the categories Ll, Lu, Lo, # Lt, Nl. # # * Name characters other than Name-start characters must have one of # the categories Mc, Me, Mn, Lm, or Nd. # # * Characters in the compatibility area (i.e. with character code # greater than #xF900 and less than #xFFFE) are not allowed in XML # names. # # * Characters which have a font or compatibility decomposition # (i.e. those with a "compatibility formatting tag" in field 5 of the # database -- marked by field 5 beginning with a "<") are not allowed. # # * The following characters are treated as name-start characters rather # than name characters, because the property file classifies them as # Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6. # # * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode # 2.0, section 5.14). # # * Character #x00B7 is classified as an extender, because the property # list so identifies it. # # * Character #x0387 is added as a name character, because #x00B7 is its # canonical equivalent. # # * Characters ':' and '_' are allowed as name-start characters. # # * Characters '-' and '.' are allowed as name characters.