Python urlparse.html() Examples
The following are 30
code examples of urlparse.html().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urlparse
, or try the search function
.
Example #1
Source File: feedparser.py From RSSNewsGAE with Apache License 2.0 | 6 votes |
def _start_link(self, attrsD): attrsD.setdefault('rel', u'alternate') if attrsD['rel'] == u'self': attrsD.setdefault('type', u'application/atom+xml') else: attrsD.setdefault('type', u'text/html') context = self._getContext() attrsD = self._itsAnHrefDamnIt(attrsD) if 'href' in attrsD: attrsD['href'] = self.resolveURI(attrsD['href']) expectingText = self.infeed or self.inentry or self.insource context.setdefault('links', []) if not (self.inentry and self.inimage): context['links'].append(FeedParserDict(attrsD)) if 'href' in attrsD: expectingText = 0 if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): context['link'] = attrsD['href'] else: self.push('link', expectingText)
Example #2
Source File: feedparser.py From data with GNU General Public License v3.0 | 6 votes |
def _start_link(self, attrsD): attrsD.setdefault('rel', u'alternate') if attrsD['rel'] == u'self': attrsD.setdefault('type', u'application/atom+xml') else: attrsD.setdefault('type', u'text/html') context = self._getContext() attrsD = self._itsAnHrefDamnIt(attrsD) if 'href' in attrsD: attrsD['href'] = self.resolveURI(attrsD['href']) expectingText = self.infeed or self.inentry or self.insource context.setdefault('links', []) if not (self.inentry and self.inimage): context['links'].append(FeedParserDict(attrsD)) if 'href' in attrsD: expectingText = 0 if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): context['link'] = attrsD['href'] else: self.push('link', expectingText)
Example #3
Source File: feedparser.py From telegram-robot-rss with Mozilla Public License 2.0 | 6 votes |
def _start_link(self, attrsD): attrsD.setdefault('rel', u'alternate') if attrsD['rel'] == u'self': attrsD.setdefault('type', u'application/atom+xml') else: attrsD.setdefault('type', u'text/html') context = self._getContext() attrsD = self._itsAnHrefDamnIt(attrsD) if 'href' in attrsD: attrsD['href'] = self.resolveURI(attrsD['href']) expectingText = self.infeed or self.inentry or self.insource context.setdefault('links', []) if not (self.inentry and self.inimage): context['links'].append(FeedParserDict(attrsD)) if 'href' in attrsD: expectingText = 0 if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): context['link'] = attrsD['href'] else: self.push('link', expectingText)
Example #4
Source File: feedparser.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _start_link(self, attrsD): attrsD.setdefault('rel', u'alternate') if attrsD['rel'] == u'self': attrsD.setdefault('type', u'application/atom+xml') else: attrsD.setdefault('type', u'text/html') context = self._getContext() attrsD = self._itsAnHrefDamnIt(attrsD) if 'href' in attrsD: attrsD['href'] = self.resolveURI(attrsD['href']) expectingText = self.infeed or self.inentry or self.insource context.setdefault('links', []) if not (self.inentry and self.inimage): context['links'].append(FeedParserDict(attrsD)) if 'href' in attrsD: expectingText = 0 if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): context['link'] = attrsD['href'] else: self.push('link', expectingText)
Example #5
Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0 | 6 votes |
def _start_link(self, attrsD): attrsD.setdefault('rel', u'alternate') if attrsD['rel'] == u'self': attrsD.setdefault('type', u'application/atom+xml') else: attrsD.setdefault('type', u'text/html') context = self._getContext() attrsD = self._itsAnHrefDamnIt(attrsD) if 'href' in attrsD: attrsD['href'] = self.resolveURI(attrsD['href']) expectingText = self.infeed or self.inentry or self.insource context.setdefault('links', []) if not (self.inentry and self.inimage): context['links'].append(FeedParserDict(attrsD)) if 'href' in attrsD: expectingText = 0 if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): context['link'] = attrsD['href'] else: self.push('link', expectingText)
Example #6
Source File: feedparser.py From RSSNewsGAE with Apache License 2.0 | 5 votes |
def mapContentType(self, contentType): contentType = contentType.lower() if contentType == 'text' or contentType == 'plain': contentType = u'text/plain' elif contentType == 'html': contentType = u'text/html' elif contentType == 'xhtml': contentType = u'application/xhtml+xml' return contentType
Example #7
Source File: feedparser.py From data with GNU General Public License v3.0 | 5 votes |
def _start_content_encoded(self, attrsD): self.pushContent('content', attrsD, u'text/html', 1)
Example #8
Source File: feedparser.py From data with GNU General Public License v3.0 | 5 votes |
def _start_description(self, attrsD): context = self._getContext() if 'summary' in context: self._summaryKey = 'content' self._start_content(attrsD) else: self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)
Example #9
Source File: feedparser.py From data with GNU General Public License v3.0 | 5 votes |
def mapContentType(self, contentType): contentType = contentType.lower() if contentType == 'text' or contentType == 'plain': contentType = u'text/plain' elif contentType == 'html': contentType = u'text/html' elif contentType == 'xhtml': contentType = u'application/xhtml+xml' return contentType
Example #10
Source File: feedparser.py From data with GNU General Public License v3.0 | 5 votes |
def _l2bytes(l): return bytes(l) # If you want feedparser to allow all URL schemes, set this to () # List culled from Python's urlparse documentation at: # http://docs.python.org/library/urlparse.html # as well as from "URI scheme" at Wikipedia: # https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme # Many more will likely need to be added!
Example #11
Source File: feedparser.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _parse_date_rfc822(dt): """Parse RFC 822 dates and times, with one minor difference: years may be 4DIGIT or 2DIGIT. http://tools.ietf.org/html/rfc822#section-5""" try: m = _rfc822_match(dt.lower()).groupdict(0) except AttributeError: return None # Calculate a date and timestamp for k in ('year', 'day', 'hour', 'minute', 'second'): m[k] = int(m[k]) m['month'] = _rfc822_months.index(m['month']) + 1 # If the year is 2 digits, assume everything in the 90's is the 1990's if m['year'] < 100: m['year'] += (1900, 2000)[m['year'] < 90] stamp = datetime.datetime(*[m[i] for i in ('year', 'month', 'day', 'hour', 'minute', 'second')]) # Use the timezone information to calculate the difference between # the given date and timestamp and Universal Coordinated Time tzhour = 0 tzmin = 0 if m['tz'] and m['tz'].startswith('gmt'): # Handle GMT and GMT+hh:mm timezone syntax (the trailing # timezone info will be handled by the next `if` block) m['tz'] = ''.join(m['tz'][3:].split(':')) or 'gmt' if not m['tz']: pass elif m['tz'].startswith('+'): tzhour = int(m['tz'][1:3]) tzmin = int(m['tz'][3:]) elif m['tz'].startswith('-'): tzhour = int(m['tz'][1:3]) * -1 tzmin = int(m['tz'][3:]) * -1 else: tzhour = _rfc822_tznames[m['tz']] delta = datetime.timedelta(0, 0, 0, 0, tzmin, tzhour) # Return the date and timestamp in UTC return (stamp - delta).utctimetuple()
Example #12
Source File: feedparser.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _start_content_encoded(self, attrsD): self.pushContent('content', attrsD, u'text/html', 1)
Example #13
Source File: feedparser.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _start_description(self, attrsD): context = self._getContext() if 'summary' in context: self._summaryKey = 'content' self._start_content(attrsD) else: self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)
Example #14
Source File: feedparser.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 5 votes |
def mapContentType(self, contentType): contentType = contentType.lower() if contentType == 'text' or contentType == 'plain': contentType = u'text/plain' elif contentType == 'html': contentType = u'text/html' elif contentType == 'xhtml': contentType = u'application/xhtml+xml' return contentType
Example #15
Source File: feedparser.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _l2bytes(l): return bytes(l) # If you want feedparser to allow all URL schemes, set this to () # List culled from Python's urlparse documentation at: # http://docs.python.org/library/urlparse.html # as well as from "URI scheme" at Wikipedia: # https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme # Many more will likely need to be added!
Example #16
Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def _parse_date_rfc822(dt): """Parse RFC 822 dates and times, with one minor difference: years may be 4DIGIT or 2DIGIT. http://tools.ietf.org/html/rfc822#section-5""" try: m = _rfc822_match(dt.lower()).groupdict(0) except AttributeError: return None return _parse_date_group_rfc822(m)
Example #17
Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def _start_content_encoded(self, attrsD): self.pushContent('content', attrsD, u'text/html', 1)
Example #18
Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def _start_description(self, attrsD): context = self._getContext() if 'summary' in context: self._summaryKey = 'content' self._start_content(attrsD) else: self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)
Example #19
Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def mapContentType(self, contentType): contentType = contentType.lower() if contentType == 'text' or contentType == 'plain': contentType = u'text/plain' elif contentType == 'html': contentType = u'text/html' elif contentType == 'xhtml': contentType = u'application/xhtml+xml' return contentType
Example #20
Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0 | 5 votes |
def _l2bytes(l): return bytes(l) # If you want feedparser to allow all URL schemes, set this to () # List culled from Python's urlparse documentation at: # http://docs.python.org/library/urlparse.html # as well as from "URI scheme" at Wikipedia: # https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme # Many more will likely need to be added!
Example #21
Source File: feedparser.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def _start_content_encoded(self, attrsD): self.pushContent('content', attrsD, u'text/html', 1)
Example #22
Source File: feedparser.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def _start_description(self, attrsD): context = self._getContext() if 'summary' in context: self._summaryKey = 'content' self._start_content(attrsD) else: self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)
Example #23
Source File: feedparser.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def mapContentType(self, contentType): contentType = contentType.lower() if contentType == 'text' or contentType == 'plain': contentType = u'text/plain' elif contentType == 'html': contentType = u'text/html' elif contentType == 'xhtml': contentType = u'application/xhtml+xml' return contentType
Example #24
Source File: feedparser.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def _l2bytes(l): return bytes(l) # If you want feedparser to allow all URL schemes, set this to () # List culled from Python's urlparse documentation at: # http://docs.python.org/library/urlparse.html # as well as from "URI scheme" at Wikipedia: # https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme # Many more will likely need to be added!
Example #25
Source File: feedparser.py From RSSNewsGAE with Apache License 2.0 | 5 votes |
def _start_content_encoded(self, attrsD): self.pushContent('content', attrsD, u'text/html', 1)
Example #26
Source File: feedparser.py From RSSNewsGAE with Apache License 2.0 | 5 votes |
def _start_description(self, attrsD): context = self._getContext() if 'summary' in context: self._summaryKey = 'content' self._start_content(attrsD) else: self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)
Example #27
Source File: feedparser.py From RSSNewsGAE with Apache License 2.0 | 5 votes |
def _l2bytes(l): return bytes(l) # If you want feedparser to allow all URL schemes, set this to () # List culled from Python's urlparse documentation at: # http://docs.python.org/library/urlparse.html # as well as from "URI scheme" at Wikipedia: # https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme # Many more will likely need to be added!
Example #28
Source File: feedparser.py From xbmc-addons-chinese with GNU General Public License v2.0 | 4 votes |
def unknown_starttag(self, tag, attrs): acceptable_attributes = self.acceptable_attributes keymap = {} if not tag in self.acceptable_elements or self.svgOK: if tag in self.unacceptable_elements_with_end_tag: self.unacceptablestack += 1 # add implicit namespaces to html5 inline svg/mathml if self._type.endswith('html'): if not dict(attrs).get('xmlns'): if tag=='svg': attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) if tag=='math': attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) # not otherwise acceptable, perhaps it is MathML or SVG? if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: self.mathmlOK += 1 if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: self.svgOK += 1 # chose acceptable attributes based on tag class, else bail if self.mathmlOK and tag in self.mathml_elements: acceptable_attributes = self.mathml_attributes elif self.svgOK and tag in self.svg_elements: # for most vocabularies, lowercasing is a good idea. Many # svg elements, however, are camel case if not self.svg_attr_map: lower=[attr.lower() for attr in self.svg_attributes] mix=[a for a in self.svg_attributes if a not in lower] self.svg_attributes = lower self.svg_attr_map = dict([(a.lower(),a) for a in mix]) lower=[attr.lower() for attr in self.svg_elements] mix=[a for a in self.svg_elements if a not in lower] self.svg_elements = lower self.svg_elem_map = dict([(a.lower(),a) for a in mix]) acceptable_attributes = self.svg_attributes tag = self.svg_elem_map.get(tag,tag) keymap = self.svg_attr_map elif not tag in self.acceptable_elements: return # declare xlink namespace, if needed if self.mathmlOK or self.svgOK: if filter(lambda (n,v): n.startswith('xlink:'),attrs): if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink'))
Example #29
Source File: feedparser.py From telegram-robot-rss with Mozilla Public License 2.0 | 4 votes |
def unknown_starttag(self, tag, attrs): acceptable_attributes = self.acceptable_attributes keymap = {} if not tag in self.acceptable_elements or self.svgOK: if tag in self.unacceptable_elements_with_end_tag: self.unacceptablestack += 1 # add implicit namespaces to html5 inline svg/mathml if self._type.endswith('html'): if not dict(attrs).get('xmlns'): if tag=='svg': attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) if tag=='math': attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) # not otherwise acceptable, perhaps it is MathML or SVG? if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: self.mathmlOK += 1 if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: self.svgOK += 1 # chose acceptable attributes based on tag class, else bail if self.mathmlOK and tag in self.mathml_elements: acceptable_attributes = self.mathml_attributes elif self.svgOK and tag in self.svg_elements: # for most vocabularies, lowercasing is a good idea. Many # svg elements, however, are camel case if not self.svg_attr_map: lower=[attr.lower() for attr in self.svg_attributes] mix=[a for a in self.svg_attributes if a not in lower] self.svg_attributes = lower self.svg_attr_map = dict([(a.lower(),a) for a in mix]) lower=[attr.lower() for attr in self.svg_elements] mix=[a for a in self.svg_elements if a not in lower] self.svg_elements = lower self.svg_elem_map = dict([(a.lower(),a) for a in mix]) acceptable_attributes = self.svg_attributes tag = self.svg_elem_map.get(tag,tag) keymap = self.svg_attr_map elif not tag in self.acceptable_elements: return # declare xlink namespace, if needed if self.mathmlOK or self.svgOK: if filter(lambda (n,v): n.startswith('xlink:'),attrs): if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink'))
Example #30
Source File: feedparser.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 4 votes |
def unknown_starttag(self, tag, attrs): acceptable_attributes = self.acceptable_attributes keymap = {} if not tag in self.acceptable_elements or self.svgOK: if tag in self.unacceptable_elements_with_end_tag: self.unacceptablestack += 1 # add implicit namespaces to html5 inline svg/mathml if self._type.endswith('html'): if not dict(attrs).get('xmlns'): if tag=='svg': attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) if tag=='math': attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) # not otherwise acceptable, perhaps it is MathML or SVG? if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: self.mathmlOK += 1 if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: self.svgOK += 1 # chose acceptable attributes based on tag class, else bail if self.mathmlOK and tag in self.mathml_elements: acceptable_attributes = self.mathml_attributes elif self.svgOK and tag in self.svg_elements: # for most vocabularies, lowercasing is a good idea. Many # svg elements, however, are camel case if not self.svg_attr_map: lower=[attr.lower() for attr in self.svg_attributes] mix=[a for a in self.svg_attributes if a not in lower] self.svg_attributes = lower self.svg_attr_map = dict([(a.lower(),a) for a in mix]) lower=[attr.lower() for attr in self.svg_elements] mix=[a for a in self.svg_elements if a not in lower] self.svg_elements = lower self.svg_elem_map = dict([(a.lower(),a) for a in mix]) acceptable_attributes = self.svg_attributes tag = self.svg_elem_map.get(tag,tag) keymap = self.svg_attr_map elif not tag in self.acceptable_elements: return # declare xlink namespace, if needed if self.mathmlOK or self.svgOK: if filter(lambda (n,v): n.startswith('xlink:'),attrs): if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink'))