Python codecs.BOM_UTF16 Examples
The following are 10
code examples of codecs.BOM_UTF16().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
codecs
, or try the search function
.
Example #1
Source File: encoding.py From codimension with GNU General Public License v3.0 | 5 votes |
def detectFileEncodingToRead(fName, text=None): """Detects the read encoding""" if text is None: with open(fName, 'rb') as diskfile: text = diskfile.read(1024) # Step 1: check for BOM if text.startswith(BOM_UTF8): return 'bom-utf-8' if text.startswith(BOM_UTF16): return 'bom-utf-16' if text.startswith(BOM_UTF32): return 'bom-utf-32' # Check if it was a user assigned encoding userAssignedEncoding = getFileEncoding(fName) if userAssignedEncoding: return userAssignedEncoding # Step 3: extract encoding from the file encFromFile = getCodingFromBytes(text) if encFromFile: return encFromFile # Step 4: check the project default encoding project = GlobalData().project if project.isLoaded(): projectEncoding = project.props['encoding'] if projectEncoding: return projectEncoding # Step 5: checks the IDE encoding ideEncoding = Settings()['encoding'] if ideEncoding: return ideEncoding # Step 6: default return DEFAULT_ENCODING
Example #2
Source File: encoding.py From codimension with GNU General Public License v3.0 | 5 votes |
def writeEncodedFile(fName, content, encoding): """Writes into a file taking care of encoding""" normEnc = getNormalizedEncoding(encoding) try: if normEnc.startswith('bom_'): enc = normEnc[4:] if enc == 'utf_8': encContent = BOM_UTF8 + content.encode(enc) elif enc == 'utf_16': encContent = BOM_UTF16 + content.encode(enc) else: encContent = BOM_UTF32 + content.encode(enc) else: encContent = content.encode(normEnc) # Workaround for empty files: if there is no visible content and # the file is saved then the editor reports precisely \n which is # saved on disk and then detected as octet-stream. If there are # more than one \n then the file is detected as plain text. # The octet stream files are not openable in Codimension if encContent == b'\n': encContent = b'' except (UnicodeError, LookupError) as exc: raise Exception('Error encoding the buffer content with ' + encoding + ': ' + str(exc)) try: with open(fName, 'wb') as diskfile: diskfile.write(encContent) except Exception as exc: raise Exception('Error writing encoded buffer content into ' + fName + ': ' + str(exc))
Example #3
Source File: helper.py From OpenDoor with GNU General Public License v3.0 | 5 votes |
def decode(str, errors='strict'): """ Decode strings :param str str: input string :param str errors:error level :return: str """ output = '' try: if len(str) < 3: if codecs.BOM_UTF8.startswith(str): # not enough data to decide if this is a BOM # => try again on the next call output = "" elif str[:3] == codecs.BOM_UTF8: (output, sizes) = codecs.utf_8_decode(str[3:], errors) elif str[:3] == codecs.BOM_UTF16: output = str[3:].decode('utf16') else: # (else) no BOM present (output, sizes) = codecs.utf_8_decode(str, errors) return str(output) except (UnicodeDecodeError, Exception): # seems, its getting not a content (images, file, etc) try: return str.decode('cp1251') except (UnicodeDecodeError, Exception): return ""
Example #4
Source File: stresstest.py From concurrent-log-handler with Apache License 2.0 | 5 votes |
def combine_logs(combinedlog, iterable, mode="wb"): """ write all lines (iterable) into a single log file. """ fp = io.open(combinedlog, mode) if ENCODING == 'utf-16': import codecs fp.write(codecs.BOM_UTF16) for chunk in iterable: fp.write(chunk) fp.close()
Example #5
Source File: test_codecs.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_utf_16_encode(self): # On little-endian systems, UTF-16 encodes in UTF-16-LE prefixed with BOM data, num_processed = codecs.utf_16_encode("abc") self.assertEqual(data, codecs.BOM_UTF16 + b'a\0b\0c\0') self.assertEqual(num_processed, 3) self.assertRaises(TypeError, codecs.utf_16_encode, b"abc") self.assertRaises(TypeError, codecs.utf_16_encode, None) self.assertRaises(TypeError, codecs.utf_16_encode, None, None) self.assertEquals(codecs.utf_16_encode("", None), (codecs.BOM_UTF16, 0))
Example #6
Source File: test_surrogateescape.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_utf_16(self): b_dabcd = b'\xda\xdb\xdc\xdd' s_dabcd = b_dabcd.decode("utf_16", errors="surrogateescape") self.assertEqual(s_dabcd, '\U001069dc') encoded = s_dabcd.encode("utf_16", errors="surrogateescape") # encoded will have BOM added self.assertEqual(encoded, codecs.BOM_UTF16 + b_dabcd)
Example #7
Source File: encoding.py From conda-manager with MIT License | 5 votes |
def decode(text): """ Function to decode a text. @param text text to decode (string) @return decoded text and encoding """ try: if text.startswith(BOM_UTF8): # UTF-8 with BOM return to_text_string(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom' elif text.startswith(BOM_UTF16): # UTF-16 with BOM return to_text_string(text[len(BOM_UTF16):], 'utf-16'), 'utf-16' elif text.startswith(BOM_UTF32): # UTF-32 with BOM return to_text_string(text[len(BOM_UTF32):], 'utf-32'), 'utf-32' coding = get_coding(text) if coding: return to_text_string(text, coding), coding except (UnicodeError, LookupError): pass # Assume UTF-8 try: return to_text_string(text, 'utf-8'), 'utf-8-guessed' except (UnicodeError, LookupError): pass # Assume Latin-1 (behaviour before 3.7.1) return to_text_string(text, "latin-1"), 'latin-1-guessed'
Example #8
Source File: encoding.py From conda-manager with MIT License | 5 votes |
def is_text_file(filename): """ Test if the given path is a text-like file. Adapted from: http://stackoverflow.com/a/3002505 Original Authors: Trent Mick <TrentM@ActiveState.com> Jorge Orpinel <jorge@orpinel.com> """ try: open(filename) except Exception: return False with open(filename, 'rb') as fid: try: CHUNKSIZE = 1024 chunk = fid.read(CHUNKSIZE) # check for a UTF BOM for bom in [BOM_UTF8, BOM_UTF16, BOM_UTF32]: if chunk.startswith(bom): return True chunk = chunk.decode('utf-8') while 1: if '\0' in chunk: # found null byte return False if len(chunk) < CHUNKSIZE: break # done chunk = fid.read(CHUNKSIZE).decode('utf-8') except UnicodeDecodeError: return False except Exception: pass return True
Example #9
Source File: sqlhtml.py From termite-visualizations with BSD 3-Clause "New" or "Revised" License | 5 votes |
def export(self): out = cStringIO.StringIO() final = cStringIO.StringIO() import csv writer = csv.writer(out, delimiter='\t') if self.rows: import codecs final.write(codecs.BOM_UTF16) writer.writerow( [unicode(col).encode("utf8") for col in self.rows.colnames]) data = out.getvalue().decode("utf8") data = data.encode("utf-16") data = data[2:] final.write(data) out.truncate(0) records = self.represented() for row in records: writer.writerow( [str(col).decode('utf8').encode("utf-8") for col in row]) data = out.getvalue().decode("utf8") data = data.encode("utf-16") data = data[2:] final.write(data) out.truncate(0) return str(final.getvalue())
Example #10
Source File: encoding.py From codimension with GNU General Public License v3.0 | 4 votes |
def detectEncodingOnClearExplicit(fName, content): """Provides the reading encoding as a file would be read""" # The function is used in case the user reset the explicit encoding # so the current encoding needs to be set as if the file would be # read again try: with open(fName, 'rb') as diskfile: text = diskfile.read(1024) if text.startswith(BOM_UTF8): return 'bom-utf-8' if text.startswith(BOM_UTF16): return 'bom-utf-16' if text.startswith(BOM_UTF32): return 'bom-utf-32' # The function is called when an explicit encoding is reset so # there is no need to check for it encFromBuffer = getCodingFromText(content) if encFromBuffer: if isValidEncoding(encFromBuffer): return encFromBuffer project = GlobalData().project if project.isLoaded(): projectEncoding = project.props['encoding'] if projectEncoding: if isValidEncoding(projectEncoding): return projectEncoding ideEncoding = Settings()['encoding'] if ideEncoding: if isValidEncoding(ideEncoding): return ideEncoding return DEFAULT_ENCODING except Exception as exc: logging.warning('Error while guessing encoding for reading %s: %s', fName, str(exc)) logging.warning('The default encoding %s will be used', DEFAULT_ENCODING) return DEFAULT_ENCODING