Python idna.decode() Examples
The following are 23
code examples of idna.decode().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
idna
, or try the search function
.
Example #1
Source File: tldextract.py From CrawlBox with The Unlicense | 6 votes |
def _cache_tlds(self, tlds): '''Logs a diff of the new TLDs and caches them on disk, according to settings passed to __init__.''' if LOG.isEnabledFor(logging.DEBUG): import difflib snapshot_stream = pkg_resources.resource_stream(__name__, '.tld_set_snapshot') with closing(snapshot_stream) as snapshot_file: snapshot = sorted( json.loads(snapshot_file.read().decode('utf-8')) ) new = sorted(tlds) LOG.debug('computed TLD diff:\n' + '\n'.join(difflib.unified_diff( snapshot, new, fromfile=".tld_set_snapshot", tofile=self.cache_file ))) if self.cache_file: try: with open(self.cache_file, 'w') as cache_file: json.dump(tlds, cache_file) except IOError as ioe: LOG.warn("unable to cache TLDs in file %s: %s", self.cache_file, ioe)
Example #2
Source File: __init__.py From Galaxy_Plugin_Bethesda with MIT License | 6 votes |
def _encode_host(cls, host): try: ip, sep, zone = host.partition("%") ip = ip_address(ip) except ValueError: try: host = idna.encode(host, uts46=True).decode("ascii") except UnicodeError: host = host.encode("idna").decode("ascii") else: host = ip.compressed if sep: host += "%" + zone if ip.version == 6: host = "[" + host + "]" return host
Example #3
Source File: __init__.py From Galaxy_Plugin_Bethesda with MIT License | 6 votes |
def host(self): """Decoded host part of URL. None for relative URLs. """ raw = self.raw_host if raw is None: return None if "%" in raw: # Hack for scoped IPv6 addresses like # fe80::2%Проверка # presence of '%' sign means only IPv6 address, so idna is useless. return raw try: return idna.decode(raw.encode("ascii")) except UnicodeError: # e.g. '::1' return raw.encode("ascii").decode("idna")
Example #4
Source File: _idna.py From learn_python3_spider with MIT License | 6 votes |
def _idnaText(octets): """ Convert some IDNA-encoded octets into some human-readable text. Currently only used by the tests. @param octets: Some bytes representing a hostname. @type octets: L{bytes} @return: A human-readable domain name. @rtype: L{unicode} """ try: import idna except ImportError: return octets.decode("idna") else: return idna.decode(octets)
Example #5
Source File: __init__.py From yarl with Apache License 2.0 | 6 votes |
def _encode_host(cls, host): try: ip, sep, zone = host.partition("%") ip = ip_address(ip) except ValueError: for char in host: if char > "\x7f": break else: return host try: host = idna.encode(host, uts46=True).decode("ascii") except UnicodeError: host = host.encode("idna").decode("ascii") else: host = ip.compressed if sep: host += "%" + zone if ip.version == 6: host = "[" + host + "]" return host
Example #6
Source File: __init__.py From yarl with Apache License 2.0 | 6 votes |
def _encode_host(cls, host): try: ip, sep, zone = host.partition("%") ip = ip_address(ip) except ValueError: # IDNA encoding is slow, # skip it for ASCII-only strings if host.isascii(): return host try: host = idna.encode(host, uts46=True).decode("ascii") except UnicodeError: host = host.encode("idna").decode("ascii") else: host = ip.compressed if sep: host += "%" + zone if ip.version == 6: host = "[" + host + "]" return host
Example #7
Source File: __init__.py From yarl with Apache License 2.0 | 6 votes |
def host(self): """Decoded host part of URL. None for relative URLs. """ raw = self.raw_host if raw is None: return None if "%" in raw: # Hack for scoped IPv6 addresses like # fe80::2%Проверка # presence of '%' sign means only IPv6 address, so idna is useless. return raw try: return idna.decode(raw.encode("ascii")) except UnicodeError: # e.g. '::1' return raw.encode("ascii").decode("idna")
Example #8
Source File: _idna.py From Safejumper-for-Desktop with GNU General Public License v2.0 | 6 votes |
def _idnaText(octets): """ Convert some IDNA-encoded octets into some human-readable text. Currently only used by the tests. @param octets: Some bytes representing a hostname. @type octets: L{bytes} @return: A human-readable domain name. @rtype: L{unicode} """ try: import idna except ImportError: return octets.decode("idna") else: return idna.decode(octets)
Example #9
Source File: decode_asn1.py From Safejumper-for-Desktop with GNU General Public License v2.0 | 5 votes |
def _decode_certificate_policies(backend, cp): cp = backend._ffi.cast("Cryptography_STACK_OF_POLICYINFO *", cp) cp = backend._ffi.gc(cp, backend._lib.sk_POLICYINFO_free) num = backend._lib.sk_POLICYINFO_num(cp) certificate_policies = [] for i in range(num): qualifiers = None pi = backend._lib.sk_POLICYINFO_value(cp, i) oid = x509.ObjectIdentifier(_obj2txt(backend, pi.policyid)) if pi.qualifiers != backend._ffi.NULL: qnum = backend._lib.sk_POLICYQUALINFO_num(pi.qualifiers) qualifiers = [] for j in range(qnum): pqi = backend._lib.sk_POLICYQUALINFO_value( pi.qualifiers, j ) pqualid = x509.ObjectIdentifier( _obj2txt(backend, pqi.pqualid) ) if pqualid == CertificatePoliciesOID.CPS_QUALIFIER: cpsuri = backend._ffi.buffer( pqi.d.cpsuri.data, pqi.d.cpsuri.length )[:].decode('ascii') qualifiers.append(cpsuri) else: assert pqualid == CertificatePoliciesOID.CPS_USER_NOTICE user_notice = _decode_user_notice( backend, pqi.d.usernotice ) qualifiers.append(user_notice) certificate_policies.append( x509.PolicyInformation(oid, qualifiers) ) return x509.CertificatePolicies(certificate_policies)
Example #10
Source File: cachefile.py From URLExtract with MIT License | 5 votes |
def _load_cached_tlds(self): """ Loads TLDs from cached file to set. :return: Set of current TLDs :rtype: set """ # check if cached file is readable if not os.access(self._tld_list_path, os.R_OK): self._logger.error("Cached file is not readable for current " "user. ({})".format(self._tld_list_path)) raise CacheFileError( "Cached file is not readable for current user." ) set_of_tlds = set() with filelock.FileLock(self._get_cache_lock_file_path()): with open(self._tld_list_path, 'r') as f_cache_tld: for line in f_cache_tld: tld = line.strip().lower() # skip empty lines if not tld: continue # skip comments if tld[0] == '#': continue set_of_tlds.add("." + tld) set_of_tlds.add("." + idna.decode(tld)) return set_of_tlds
Example #11
Source File: decode_asn1.py From Safejumper-for-Desktop with GNU General Public License v2.0 | 5 votes |
def _asn1_string_to_utf8(backend, asn1_string): buf = backend._ffi.new("unsigned char **") res = backend._lib.ASN1_STRING_to_UTF8(buf, asn1_string) if res == -1: raise ValueError( "Unsupported ASN1 string type. Type: {0}".format(asn1_string.type) ) backend.openssl_assert(buf[0] != backend._ffi.NULL) buf = backend._ffi.gc( buf, lambda buffer: backend._lib.OPENSSL_free(buffer[0]) ) return backend._ffi.buffer(buf[0], res)[:].decode('utf8')
Example #12
Source File: decode_asn1.py From Safejumper-for-Desktop with GNU General Public License v2.0 | 5 votes |
def _asn1_string_to_ascii(backend, asn1_string): return _asn1_string_to_bytes(backend, asn1_string).decode("ascii")
Example #13
Source File: decode_asn1.py From Safejumper-for-Desktop with GNU General Public License v2.0 | 5 votes |
def _obj2txt(backend, obj): # Set to 80 on the recommendation of # https://www.openssl.org/docs/crypto/OBJ_nid2ln.html#return_values buf_len = 80 buf = backend._ffi.new("char[]", buf_len) res = backend._lib.OBJ_obj2txt(buf, buf_len, obj, 1) backend.openssl_assert(res > 0) return backend._ffi.buffer(buf, res)[:].decode()
Example #14
Source File: address.py From isthislegit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _lift_parse_list_result(parse_list_rs): addr_list_obj = AddressList() bad_list = [] for parse_rs in parse_list_rs: addr_obj = _lift_parse_result(parse_rs) if not addr_obj: if isinstance(parse_rs, Mailbox): bad_list.append(u'%s@%s' % (parse_rs.local_part.decode('utf-8'), parse_rs.domain.decode('utf-8'))) continue addr_list_obj.append(addr_obj) return addr_list_obj, bad_list
Example #15
Source File: address.py From isthislegit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _lift_parse_result(parse_rs): if isinstance(parse_rs, Mailbox): try: return EmailAddress( display_name=smart_unquote(parse_rs.display_name.decode('utf-8')), mailbox=parse_rs.local_part.decode('utf-8'), hostname=parse_rs.domain.decode('utf-8')) except (UnicodeError, IDNAError): return None if isinstance(parse_rs, Url): return UrlAddress(address=parse_rs.address.decode('utf-8')) return None
Example #16
Source File: x509.py From oss-ftp with MIT License | 5 votes |
def _decode_invalidity_date(backend, inv_date): generalized_time = backend._ffi.cast( "ASN1_GENERALIZEDTIME *", inv_date ) generalized_time = backend._ffi.gc( generalized_time, backend._lib.ASN1_GENERALIZEDTIME_free ) time = backend._ffi.string( backend._lib.ASN1_STRING_data( backend._ffi.cast("ASN1_STRING *", generalized_time) ) ).decode("ascii") return datetime.datetime.strptime(time, "%Y%m%d%H%M%SZ")
Example #17
Source File: x509.py From oss-ftp with MIT License | 5 votes |
def _decode_certificate_policies(backend, cp): cp = backend._ffi.cast("Cryptography_STACK_OF_POLICYINFO *", cp) cp = backend._ffi.gc(cp, backend._lib.sk_POLICYINFO_free) num = backend._lib.sk_POLICYINFO_num(cp) certificate_policies = [] for i in range(num): qualifiers = None pi = backend._lib.sk_POLICYINFO_value(cp, i) oid = x509.ObjectIdentifier(_obj2txt(backend, pi.policyid)) if pi.qualifiers != backend._ffi.NULL: qnum = backend._lib.sk_POLICYQUALINFO_num(pi.qualifiers) qualifiers = [] for j in range(qnum): pqi = backend._lib.sk_POLICYQUALINFO_value( pi.qualifiers, j ) pqualid = x509.ObjectIdentifier( _obj2txt(backend, pqi.pqualid) ) if pqualid == CertificatePoliciesOID.CPS_QUALIFIER: cpsuri = backend._ffi.buffer( pqi.d.cpsuri.data, pqi.d.cpsuri.length )[:].decode('ascii') qualifiers.append(cpsuri) else: assert pqualid == CertificatePoliciesOID.CPS_USER_NOTICE user_notice = _decode_user_notice( backend, pqi.d.usernotice ) qualifiers.append(user_notice) certificate_policies.append( x509.PolicyInformation(oid, qualifiers) ) return x509.CertificatePolicies(certificate_policies)
Example #18
Source File: x509.py From oss-ftp with MIT License | 5 votes |
def _obj2txt(backend, obj): # Set to 80 on the recommendation of # https://www.openssl.org/docs/crypto/OBJ_nid2ln.html#return_values buf_len = 80 buf = backend._ffi.new("char[]", buf_len) res = backend._lib.OBJ_obj2txt(buf, buf_len, obj, 1) backend.openssl_assert(res > 0) return backend._ffi.buffer(buf, res)[:].decode()
Example #19
Source File: tldextract.py From CrawlBox with The Unlicense | 5 votes |
def _get_snapshot_tld_extractor(): snapshot_stream = pkg_resources.resource_stream(__name__, '.tld_set_snapshot') with closing(snapshot_stream) as snapshot_file: return json.loads(snapshot_file.read().decode('utf-8'))
Example #20
Source File: tldextract.py From CrawlBox with The Unlicense | 4 votes |
def __call__(self, url): """ Takes a string URL and splits it into its subdomain, domain, and suffix (effective TLD, gTLD, ccTLD, etc.) component. >>> extract = TLDExtract() >>> extract('http://forums.news.cnn.com/') ExtractResult(subdomain='forums.news', domain='cnn', suffix='com') >>> extract('http://forums.bbc.co.uk/') ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk') """ netloc = SCHEME_RE.sub("", url) \ .partition("/")[0] \ .partition("?")[0] \ .partition("#")[0] \ .split("@")[-1] \ .partition(":")[0] \ .strip() \ .rstrip(".") labels = netloc.split(".") translations = [] for label in labels: if label.startswith("xn--"): try: translation = idna.decode(label.encode('ascii')) except UnicodeError: translation = label else: translation = label translation = translation.lower() translations.append(translation) suffix_index = self._get_tld_extractor().suffix_index(translations) registered_domain = ".".join(labels[:suffix_index]) suffix = ".".join(labels[suffix_index:]) if not suffix and netloc and looks_like_ip(netloc): return ExtractResult('', netloc, '') subdomain, _, domain = registered_domain.rpartition('.') return ExtractResult(subdomain, domain, suffix)
Example #21
Source File: address.py From isthislegit with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__(self, raw_display_name=None, raw_addr_spec=None, display_name=None, mailbox=None, hostname=None): if isinstance(raw_display_name, unicode): raw_display_name = raw_display_name.encode('utf-8') if isinstance(raw_addr_spec, unicode): raw_addr_spec = raw_addr_spec.encode('utf-8') if raw_display_name and raw_addr_spec: parser = addr_spec_parser mailbox = parser.parse(raw_addr_spec.strip(), lexer=lexer.clone()) self._display_name = raw_display_name self._mailbox = mailbox.local_part self._hostname = mailbox.domain elif raw_display_name: parser = mailbox_parser mailbox = parser.parse(raw_display_name.strip(), lexer=lexer.clone()) self._display_name = mailbox.display_name self._mailbox = mailbox.local_part self._hostname = mailbox.domain elif raw_addr_spec: parser = addr_spec_parser mailbox = parser.parse(raw_addr_spec.strip(), lexer=lexer.clone()) self._display_name = '' self._mailbox = mailbox.local_part self._hostname = mailbox.domain elif mailbox and hostname: self._display_name = display_name or '' self._mailbox = mailbox self._hostname = hostname else: raise SyntaxError('failed to create EmailAddress: bad parameters') # Convert display name to decoded unicode string. if (self._display_name.startswith('=?') and self._display_name.endswith('?=')): self._display_name = mime_to_unicode(self._display_name) if (self._display_name.startswith('"') and self._display_name.endswith('"') and len(self._display_name) > 2): self._display_name = smart_unquote(self._display_name) if isinstance(self._display_name, str): self._display_name = self._display_name.decode('utf-8') # Convert localpart to unicode string. if isinstance(self._mailbox, str): self._mailbox = self._mailbox.decode('utf-8') # Convert hostname to lowercase unicode string. self._hostname = self._hostname.lower() if self._hostname.startswith('xn--') or '.xn--' in self._hostname: self._hostname = idna.decode(self._hostname) if isinstance(self._hostname, str): self._hostname = self._hostname.decode('utf-8') if not is_pure_ascii(self._hostname): idna.encode(self._hostname)
Example #22
Source File: _url.py From learn_python3_spider with MIT License | 4 votes |
def _decode_host(host): """Decode a host from ASCII-encodable text to IDNA-decoded text. If the host text is not ASCII, it is returned unchanged, as it is presumed that it is already IDNA-decoded. Some technical details: _decode_host is built on top of the "idna" package, which has some quirks: Capital letters are not valid IDNA2008. The idna package will raise an exception like this on capital letters: > idna.core.InvalidCodepoint: Codepoint U+004B at position 1 ... not allowed However, if a segment of a host (i.e., something in url.host.split('.')) is already ASCII, idna doesn't perform its usual checks. In fact, for capital letters it automatically lowercases them. This check and some other functionality can be bypassed by passing uts46=True to idna.encode/decode. This allows a more permissive and convenient interface. So far it seems like the balanced approach. Example output (from idna==2.6): >> idna.encode(u'mahmöud.io') 'xn--mahmud-zxa.io' >> idna.encode(u'Mahmöud.io') Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 355, in encode result.append(alabel(label)) File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 276, in alabel check_label(label) File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 253, in check_label raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label))) idna.core.InvalidCodepoint: Codepoint U+004D at position 1 of u'Mahm\xf6ud' not allowed >> idna.encode(u'Mahmoud.io') 'Mahmoud.io' # Similar behavior for decodes below >> idna.decode(u'Mahmoud.io') u'mahmoud.io >> idna.decode(u'Méhmoud.io', uts46=True) u'm\xe9hmoud.io' """ if not host: return u'' try: host_bytes = host.encode("ascii") except UnicodeEncodeError: host_text = host else: try: host_text = idna_decode(host_bytes, uts46=True) except ValueError: # only reached on "narrow" (UCS-2) Python builds <3.4, see #7 # NOTE: not going to raise here, because there's no # ambiguity in the IDNA, and the host is still # technically usable host_text = host return host_text
Example #23
Source File: cachefile.py From URLExtract with MIT License | 4 votes |
def _download_tlds_list(self): """ Function downloads list of TLDs from IANA. LINK: https://data.iana.org/TLD/tlds-alpha-by-domain.txt :return: True if list was downloaded, False in case of an error :rtype: bool """ url_list = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt' # Default cache file exist (set by _default_cache_file) # and we want to write permission if self._default_cache_file and \ not os.access(self._tld_list_path, os.W_OK): self._logger.info("Default cache file is not writable.") self._tld_list_path = self._get_cache_file_path() self._logger.info( "Changed path of cache file to: %s", self._tld_list_path ) if os.access(self._tld_list_path, os.F_OK) and \ not os.access(self._tld_list_path, os.W_OK): self._logger.error("ERROR: Cache file is not writable for current " "user. ({})".format(self._tld_list_path)) return False req = urllib.request.Request(url_list) req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.0; ' 'WOW64; rv:24.0) Gecko/20100101 ' 'Firefox/24.0') try: with urllib.request.urlopen(req) as f: page = f.read().decode('utf-8') except HTTPError as e: self._logger.error("ERROR: Can not download list ot TLDs. " "(HTTPError: {})".format(e.reason)) return False except URLError as e: self._logger.error("ERROR: Can not download list ot TLDs. " "(URLError: {})".format(e.reason)) return False with filelock.FileLock(self._get_cache_lock_file_path()): with open(self._tld_list_path, 'w') as ftld: ftld.write(page) return True