Python codecs.utf_8_decode() Examples
The following are 30
code examples of codecs.utf_8_decode().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
codecs
, or try the search function
.
Example #1
Source File: test_codecs.py From Fluid-Designer with GNU General Public License v3.0 | 6 votes |
def test_decode_unicode(self): # Most decoders don't accept unicode input decoders = [ codecs.utf_7_decode, codecs.utf_8_decode, codecs.utf_16_le_decode, codecs.utf_16_be_decode, codecs.utf_16_ex_decode, codecs.utf_32_decode, codecs.utf_32_le_decode, codecs.utf_32_be_decode, codecs.utf_32_ex_decode, codecs.latin_1_decode, codecs.ascii_decode, codecs.charmap_decode, ] if hasattr(codecs, "mbcs_decode"): decoders.append(codecs.mbcs_decode) for decoder in decoders: self.assertRaises(TypeError, decoder, "xxx")
Example #2
Source File: utf_8_sig.py From Fluid-Designer with GNU General Public License v3.0 | 6 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #3
Source File: __init__.py From nocolon with BSD 3-Clause "New" or "Revised" License | 6 votes |
def decode(input, errors='strict', *args): """Finds indent and add a colon on previous line""" u, l = codecs.utf_8_decode(input, errors, True) out = [] offset = 0 for line in u.split('\n'): if line.strip(): indent = len(line) - len(line.lstrip()) if indent > offset: i = -1 while not out[i].strip() and len(out) > -i: i -= 1 if out[i].rstrip()[-1] != ':': out[i] += ':' offset = indent out.append(line) return '\n'.join(out), l
Example #4
Source File: utf_8_sig.py From kobo-predict with BSD 2-Clause "Simplified" License | 6 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #5
Source File: utf_8_sig.py From Imogen with MIT License | 6 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #6
Source File: utf_8_sig.py From scylla with Apache License 2.0 | 6 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #7
Source File: utf_8_sig.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #8
Source File: utf_8_sig.py From ironpython3 with Apache License 2.0 | 6 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return ("", 0) else: self.first = 0 else: self.first = 0 if input[:3] == codecs.BOM_UTF8: (output, consumed) = \ codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #9
Source File: utf_8_sig.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return (u"", 0) else: self.first = None else: self.first = None if input[:3] == codecs.BOM_UTF8: (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #10
Source File: reader.py From bash-lambda-layer with MIT License | 5 votes |
def determine_encoding(self): while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.update(1)
Example #11
Source File: utf_8.py From ironpython3 with Apache License 2.0 | 5 votes |
def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True)
Example #12
Source File: utf_8_sig.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return (u"", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API
Example #13
Source File: utf_8.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True)
Example #14
Source File: utf_8_sig.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return ("", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API
Example #15
Source File: utf_8.py From telegram-robot-rss with Mozilla Public License 2.0 | 5 votes |
def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True)
Example #16
Source File: reader.py From cronyo with MIT License | 5 votes |
def determine_encoding(self): while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.update(1)
Example #17
Source File: utf_8_sig.py From ironpython3 with Apache License 2.0 | 5 votes |
def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return ("", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API
Example #18
Source File: utf_8_sig.py From Imogen with MIT License | 5 votes |
def decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix)
Example #19
Source File: utf_8_sig.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix)
Example #20
Source File: reader.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def determine_encoding(self): while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.update(1)
Example #21
Source File: utf_8.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True)
Example #22
Source File: utf_8_sig.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return ("", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API
Example #23
Source File: utf_8_sig.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix)
Example #24
Source File: __init__.py From mongo-mockup-db with Apache License 2.0 | 5 votes |
def _get_c_string(data, position): """Decode a BSON 'C' string to python unicode string.""" end = data.index(b"\x00", position) return _utf_8_decode(data[position:end], None, True)[0], end + 1
Example #25
Source File: utf_8.py From pmatic with GNU General Public License v2.0 | 5 votes |
def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True)
Example #26
Source File: utf_8_sig.py From pmatic with GNU General Public License v2.0 | 5 votes |
def decode(self, input, errors='strict'): if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return (u"", 0) elif input[:3] == codecs.BOM_UTF8: self.decode = codecs.utf_8_decode (output, consumed) = codecs.utf_8_decode(input[3:],errors) return (output, consumed+3) # (else) no BOM present self.decode = codecs.utf_8_decode return codecs.utf_8_decode(input, errors) ### encodings module API
Example #27
Source File: utf_8_sig.py From pmatic with GNU General Public License v2.0 | 5 votes |
def _buffer_decode(self, input, errors, final): if self.first: if len(input) < 3: if codecs.BOM_UTF8.startswith(input): # not enough data to decide if this really is a BOM # => try again on the next call return (u"", 0) else: self.first = None else: self.first = None if input[:3] == codecs.BOM_UTF8: (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) return (output, consumed+3) return codecs.utf_8_decode(input, errors, final)
Example #28
Source File: utf_8_sig.py From pmatic with GNU General Public License v2.0 | 5 votes |
def decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix)
Example #29
Source File: helper.py From OpenDoor with GNU General Public License v3.0 | 5 votes |
def decode(str, errors='strict'): """ Decode strings :param str str: input string :param str errors:error level :return: str """ output = '' try: if len(str) < 3: if codecs.BOM_UTF8.startswith(str): # not enough data to decide if this is a BOM # => try again on the next call output = "" elif str[:3] == codecs.BOM_UTF8: (output, sizes) = codecs.utf_8_decode(str[3:], errors) elif str[:3] == codecs.BOM_UTF16: output = str[3:].decode('utf16') else: # (else) no BOM present (output, sizes) = codecs.utf_8_decode(str, errors) return str(output) except (UnicodeDecodeError, Exception): # seems, its getting not a content (images, file, etc) try: return str.decode('cp1251') except (UnicodeDecodeError, Exception): return ""
Example #30
Source File: reader.py From pipenv with MIT License | 5 votes |
def determine_encoding(self): while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() if isinstance(self.raw_buffer, bytes): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.update(1)