Python codecs.html() Examples
The following are 21
code examples of codecs.html().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
codecs
, or try the search function
.
Example #1
Source File: main.py From cvt2utf with MIT License | 6 votes |
def normalize_codec_name(chardet_name): """ Normalizes chardet codec names to Python codec names. :param chardet_name: chardet codec names :return: Python codec names. See: https://docs.python.org/3.7/library/codecs.html#standard-encodings """ python_name = chardet_name.lower().replace('iso-', 'iso').replace('-', '_') python_name = codecs.lookup(python_name).name # Since chardet only recognized all GB-based target_encoding as 'gb2312', the decoding will fail when the text file # contains certain special charaters. To make it more special-character-tolerant, we should # upgrade the target_encoding to 'gb18030', which is a character set larger than gb2312. if python_name == 'gb2312': return 'gb18030' return python_name
Example #2
Source File: shell.py From supersqlite with MIT License | 6 votes |
def pop_output(self): """Restores most recently pushed output. There are many output parameters such as nullvalue, mode (list/tcl/html/insert etc), column widths, header etc. If you temporarily need to change some settings then :meth:`push_output`, change the settings and then pop the old ones back. A simple example is implementing a command like .dump. Push the current output, change the mode to insert so we get SQL inserts printed and then pop to go back to what was there before. """ # first item should always be present assert len(self._output_stack) if len(self._output_stack)==1: o=self._output_stack[0] else: o=self._output_stack.pop() for k,v in o.items(): setattr(self,k,v)
Example #3
Source File: shell.py From supersqlite with MIT License | 6 votes |
def process_command(self, cmd): """Processes a dot command. It is split into parts using the `shlex.split <http://docs.python.org/library/shlex.html#shlex.split>`__ function which is roughly the same method used by Unix/POSIX shells. """ if self.echo: self.write(self.stderr, cmd+"\n") # broken with unicode on Python 2!!! if sys.version_info<(3,0): cmd=cmd.encode("utf8") cmd=[c.decode("utf8") for c in shlex.split(cmd)] else: cmd=shlex.split(cmd) assert cmd[0][0]=="." cmd[0]=cmd[0][1:] fn=getattr(self, "command_"+cmd[0], None) if not fn: raise self.Error("Unknown command \"%s\". Enter \".help\" for help" % (cmd[0],)) res=fn(cmd[1:]) ### ### Commands start here ###
Example #4
Source File: surrogateescape.py From caterpillar with Apache License 2.0 | 6 votes |
def error_handler(error): """Error handler for surrogateescape decoding. Should be used with an ASCII-compatible encoding (e.g., 'latin-1' or 'utf-8'). Replaces any invalid byte sequences with surrogate code points. As specified in https://docs.python.org/2/library/codecs.html#codecs.register_error. """ # We can't use this with UnicodeEncodeError; the UTF-8 encoder doesn't raise # an error for surrogates. Instead, use encode. if not isinstance(error, UnicodeDecodeError): raise error result = [] for i in range(error.start, error.end): byte = ord(error.object[i]) if byte < 128: raise error result.append(unichr(0xdc00 + byte)) return ''.join(result), error.end
Example #5
Source File: shell.py From magnitude with MIT License | 6 votes |
def pop_output(self): """Restores most recently pushed output. There are many output parameters such as nullvalue, mode (list/tcl/html/insert etc), column widths, header etc. If you temporarily need to change some settings then :meth:`push_output`, change the settings and then pop the old ones back. A simple example is implementing a command like .dump. Push the current output, change the mode to insert so we get SQL inserts printed and then pop to go back to what was there before. """ # first item should always be present assert len(self._output_stack) if len(self._output_stack)==1: o=self._output_stack[0] else: o=self._output_stack.pop() for k,v in o.items(): setattr(self,k,v)
Example #6
Source File: shell.py From magnitude with MIT License | 6 votes |
def process_command(self, cmd): """Processes a dot command. It is split into parts using the `shlex.split <http://docs.python.org/library/shlex.html#shlex.split>`__ function which is roughly the same method used by Unix/POSIX shells. """ if self.echo: self.write(self.stderr, cmd+"\n") # broken with unicode on Python 2!!! if sys.version_info<(3,0): cmd=cmd.encode("utf8") cmd=[c.decode("utf8") for c in shlex.split(cmd)] else: cmd=shlex.split(cmd) assert cmd[0][0]=="." cmd[0]=cmd[0][1:] fn=getattr(self, "command_"+cmd[0], None) if not fn: raise self.Error("Unknown command \"%s\". Enter \".help\" for help" % (cmd[0],)) res=fn(cmd[1:]) ### ### Commands start here ###
Example #7
Source File: unit_tests.py From roberteldersoftwarediff with Apache License 2.0 | 6 votes |
def get_special_case_params(): # The windows and unix specific tests should be tested on both unix and Windows to detect crashes. special_cases = [ [u"noexist", u"noexist"], [u"tests/ascii/ex1", u"noexist"], [u"noexist", u"tests/ascii/ex1"], [u"tests/ascii/ex1", u"tests/ascii/ex1", "--outfile", "/dev/null"], [u"tests/ascii/ex1", u"tests/ascii/ex2"], [u"tests/utf_8/ex3", u"tests/utf_8/ex4"], [u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\""], [u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\""], [u"tests/ascii/ex5", u"tests/ascii/ex6"], [u"tests/ascii/ex7", u"tests/ascii/ex8"], [u"tests/ascii/a.json", u"tests/ascii/b.json"], [u"tests/ascii/a.json", u"tests/ascii/b.json", u"--push-delimiters", u"\"{\"", u"\"[\"", u"--pop-delimiters", u"\"}\"", u"\"]\"", u"--include-delimiters"], [u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"日本国", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""], [u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"\"\\u65e5\\u672c\\u56fd\"", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""], [u"tests/utf_8/this-is-encoded-in-utf-8", u"tests/utf_16/this-is-encoded-in-utf-16", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-16\"", u"--oldfile-encoding", u"\"utf-8\"", u"--enable-mark"], [u"tests/ascii/a.html", u"tests/ascii/b.html", u"-m", u"html"] ] return special_cases[random.randint(0, len(special_cases)-1)]
Example #8
Source File: codec.py From naz with MIT License | 5 votes |
def decode(input: bytes, errors: str = "strict") -> typing.Tuple[str, int]: """ return a string decoded from the given bytes and its length. Parameters: input: the bytes to decode errors: same meaning as the errors argument to pythons' `encode <https://docs.python.org/3/library/codecs.html#codecs.encode>`_ method """ return codecs.utf_16_be_decode(input, errors)
Example #9
Source File: codec.py From naz with MIT License | 5 votes |
def register_codecs(custom_codecs: typing.Union[None, typing.Dict[str, codecs.CodecInfo]] = None): """ Register codecs, both custom and naz inbuilt ones. Custom codecs that have same encoding as inbuilt ones will take precedence. Users should never have to use this directly, instead; use `naz.Client(custom_codecs={"my_encoding": codecs.CodecInfo(name="my_encoding", encode=..., decode=...)})` Parameters: custom_codecs: a list of custom codecs to register. """ if custom_codecs is None: custom_codecs = {} # Note: Search function registration is not currently reversible, # which may cause problems in some cases, such as unit testing or module reloading. # https://docs.python.org/3.7/library/codecs.html#codecs.register # # Note: Encodings are first looked up in the registry's cache. # thus if you call `register_codecs` and then call it again with different # codecs, the second codecs may not take effect. # ie; codecs.lookup(encoding) will return the first codecs since they were stored # in the cache. # There doesn't appear to be away to clear codec cache at runtime. # see: https://docs.python.org/3/library/codecs.html#codecs.lookup def _codec_search_function(_encoding): """ We should try and get codecs from the custom_codecs first. This way, if someone had overridden an inbuilt codec, their implementation is chosen first and cached. """ if custom_codecs.get(_encoding): return custom_codecs.get(_encoding) else: return _INBUILT_CODECS.get(_encoding) codecs.register(_codec_search_function)
Example #10
Source File: shell.py From magnitude with MIT License | 5 votes |
def usage(self): "Returns the usage message. Make sure it is newline terminated" msg=""" Usage: program [OPTIONS] FILENAME [SQL|CMD] [SQL|CMD]... FILENAME is the name of a SQLite database. A new database is created if the file does not exist. OPTIONS include: -init filename read/process named file -echo print commands before execution -[no]header turn headers on or off -bail stop after hitting an error -interactive force interactive I/O -batch force batch I/O -column set output mode to 'column' -csv set output mode to 'csv' -html set output mode to 'html' -line set output mode to 'line' -list set output mode to 'list' -python set output mode to 'python' -separator 'x' set output field separator (|) -nullvalue 'text' set text string for NULL values -version show SQLite version -encoding 'name' the encoding to use for files opened via .import, .read & .output -nocolour disables colour output to screen """ return msg.lstrip() ### ### Value formatting routines. They take a value and return a ### text formatting of them. Mostly used by the various output's ### but also by random other pieces of code. ###
Example #11
Source File: codec.py From naz with MIT License | 5 votes |
def encode(input: str, errors: str = "strict") -> typing.Tuple[bytes, int]: """ return an encoded version of the string as a bytes object and its length. Parameters: input: the string to encode errors: same meaning as the errors argument to pythons' `encode <https://docs.python.org/3/library/codecs.html#codecs.encode>`_ method """ # https://github.com/google/pytype/issues/348 return codecs.utf_16_be_encode(input, errors)
Example #12
Source File: codec.py From naz with MIT License | 5 votes |
def decode(input: bytes, errors: str = "strict") -> typing.Tuple[str, int]: """ return a string decoded from the given bytes and its length. Parameters: input: the bytes to decode errors: same meaning as the errors argument to pythons' `encode <https://docs.python.org/3/library/codecs.html#codecs.encode>`_ method """ res = iter(input) result = [] for position, c in enumerate(res): try: if c == 27: c = next(res) result.append(GSM7BitCodec.gsm_extension[c]) else: result.append(GSM7BitCodec.gsm_basic_charset[c]) except IndexError as indexErrorException: result.append( GSM7BitCodec._handle_decode_error( c, errors, position, input, indexErrorException ) ) obj = "".join(result) return (obj, len(obj))
Example #13
Source File: _util.py From shadowsocks with Apache License 2.0 | 5 votes |
def find_encodings(enc=None, system=False): """Find functions for encoding translations for a specific codec. :param str enc: The codec to find translation functions for. It will be normalized by converting to lowercase, excluding everything which is not ascii, and hyphens will be converted to underscores. :param bool system: If True, find encodings based on the system's stdin encoding, otherwise assume utf-8. :raises: :exc:LookupError if the normalized codec, ``enc``, cannot be found in Python's encoding translation map. """ if not enc: enc = 'utf-8' if system: if getattr(sys.stdin, 'encoding', None) is None: enc = sys.stdin.encoding log.debug("Obtained encoding from stdin: %s" % enc) else: enc = 'ascii' ## have to have lowercase to work, see ## http://docs.python.org/dev/library/codecs.html#standard-encodings enc = enc.lower() codec_alias = encodings.normalize_encoding(enc) codecs.register(encodings.search_function) coder = codecs.lookup(codec_alias) return coder
Example #14
Source File: _util.py From shadowsocks with Apache License 2.0 | 5 votes |
def b(x): """See http://python3porting.com/problems.html#nicer-solutions""" coder = find_encodings() if isinstance(x, bytes): return coder.encode(x.decode(coder.name))[0] else: return coder.encode(x)[0]
Example #15
Source File: _util.py From shadowsocks with Apache License 2.0 | 5 votes |
def b(x): """See http://python3porting.com/problems.html#nicer-solutions""" return x
Example #16
Source File: codec.py From naz with MIT License | 5 votes |
def encode(input: str, errors: str = "strict") -> typing.Tuple[bytes, int]: """ return an encoded version of the string as a bytes object and its length. Parameters: input: the string to encode errors: same meaning as the errors argument to pythons' `encode <https://docs.python.org/3/library/codecs.html#codecs.encode>`_ method """ # for the types of this method, # see: https://github.com/python/typeshed/blob/f7d240f06e5608a20b2daac4e96fe085c0577239/stdlib/2and3/codecs.pyi#L21-L22 result = [] for position, c in enumerate(input): idx = GSM7BitCodec.gsm_basic_charset_map.get(c) if idx is not None: result.append(chr(idx)) continue idx = GSM7BitCodec.gsm_extension_map.get(c) if idx is not None: result.append(chr(27) + chr(idx)) else: result.append(GSM7BitCodec._handle_encode_error(c, errors, position, input)) obj = "".join(result) # this is equivalent to; # import six; six.b('someString') # see: # https://github.com/benjaminp/six/blob/68112f3193c7d4bef5ad86ed1b6ed528edd9093d/six.py#L625 obj_bytes = obj.encode("latin-1") return (obj_bytes, len(obj_bytes))
Example #17
Source File: shell.py From supersqlite with MIT License | 5 votes |
def usage(self): "Returns the usage message. Make sure it is newline terminated" msg=""" Usage: program [OPTIONS] FILENAME [SQL|CMD] [SQL|CMD]... FILENAME is the name of a SQLite database. A new database is created if the file does not exist. OPTIONS include: -init filename read/process named file -echo print commands before execution -[no]header turn headers on or off -bail stop after hitting an error -interactive force interactive I/O -batch force batch I/O -column set output mode to 'column' -csv set output mode to 'csv' -html set output mode to 'html' -line set output mode to 'line' -list set output mode to 'list' -python set output mode to 'python' -separator 'x' set output field separator (|) -nullvalue 'text' set text string for NULL values -version show SQLite version -encoding 'name' the encoding to use for files opened via .import, .read & .output -nocolour disables colour output to screen """ return msg.lstrip() ### ### Value formatting routines. They take a value and return a ### text formatting of them. Mostly used by the various output's ### but also by random other pieces of code. ###
Example #18
Source File: jproperties.py From community-edition-setup with MIT License | 5 votes |
def _jbackslashreplace_error_handler(err): """ Encoding error handler which replaces invalid characters with Java-compliant Unicode escape sequences. :param err: An `:exc:UnicodeEncodeError` instance. :return: See https://docs.python.org/2/library/codecs.html?highlight=codecs#codecs.register_error """ if not isinstance(err, UnicodeEncodeError): raise err return _escape_non_ascii(err.object[err.start:err.end]), err.end
Example #19
Source File: formats.py From fuel with MIT License | 5 votes |
def open_(filename, mode='r', encoding=None): """Open a text file with encoding and optional gzip compression. Note that on legacy Python any encoding other than ``None`` or opening GZipped files will return an unpicklable file-like object. Parameters ---------- filename : str The filename to read. mode : str, optional The mode with which to open the file. Defaults to `r`. encoding : str, optional The encoding to use (see the codecs documentation_ for supported values). Defaults to ``None``. .. _documentation: https://docs.python.org/3/library/codecs.html#standard-encodings """ if filename.endswith('.gz'): if six.PY2: zf = io.BufferedReader(gzip.open(filename, mode)) if encoding: return codecs.getreader(encoding)(zf) else: return zf else: return io.BufferedReader(gzip.open(filename, mode, encoding=encoding)) if six.PY2: if encoding: return codecs.open(filename, mode, encoding=encoding) else: return open(filename, mode) else: return open(filename, mode, encoding=encoding)
Example #20
Source File: codecs.py From Emoji-Tools with GNU General Public License v3.0 | 5 votes |
def encode(self, input, errors='strict'): assert errors == 'strict' #return codecs.encode(input, self.base_encoding, self.name), len(input) # The above line could totally be all we needed, relying on the error # handling to replace the unencodable Unicode characters with our extended # byte sequences. # # However, there seems to be a design bug in Python (probably intentional): # the error handler for encoding is supposed to return a **Unicode** character, # that then needs to be encodable itself... Ugh. # # So we implement what codecs.encode() should have been doing: which is expect # error handler to return bytes() to be added to the output. # # This seems to have been fixed in Python 3.3. We should try using that and # use fallback only if that failed. # https://docs.python.org/3.3/library/codecs.html#codecs.register_error length = len(input) out = b'' while input: try: part = codecs.encode(input, self.base_encoding) out += part input = '' # All converted except UnicodeEncodeError as e: # Convert the correct part out += codecs.encode(input[:e.start], self.base_encoding) replacement, pos = self.error(e) out += replacement input = input[pos:] return out, length
Example #21
Source File: basic.py From EasY_HaCk with Apache License 2.0 | 4 votes |
def decodePage(page, contentEncoding, contentType): """ Decode compressed/charset HTTP response """ if not page or (conf.nullConnection and len(page) < 2): return getUnicode(page) if isinstance(contentEncoding, basestring) and contentEncoding: contentEncoding = contentEncoding.lower() else: contentEncoding = "" if isinstance(contentType, basestring) and contentType: contentType = contentType.lower() else: contentType = "" if contentEncoding in ("gzip", "x-gzip", "deflate"): if not kb.pageCompress: return None try: if contentEncoding == "deflate": data = StringIO.StringIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations else: data = gzip.GzipFile("", "rb", 9, StringIO.StringIO(page)) size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py if size > MAX_CONNECTION_TOTAL_SIZE: raise Exception("size too large") page = data.read() except Exception, msg: if "<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored) errMsg = "detected invalid data for declared content " errMsg += "encoding '%s' ('%s')" % (contentEncoding, msg) singleTimeLogMessage(errMsg, logging.ERROR) warnMsg = "turning off page compression" singleTimeWarnMessage(warnMsg) kb.pageCompress = False raise SqlmapCompressionException