Python tokenize.detect_encoding() Examples
The following are 30
code examples of tokenize.detect_encoding().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tokenize
, or try the search function
.
Example #1
Source File: _bootstrap.py From jawfish with MIT License | 7 votes |
def get_source(self, fullname): """Concrete implementation of InspectLoader.get_source.""" import tokenize path = self.get_filename(fullname) try: source_bytes = self.get_data(path) except IOError as exc: raise ImportError("source not available through get_data()", name=fullname) from exc readsource = _io.BytesIO(source_bytes).readline try: encoding = tokenize.detect_encoding(readsource) except SyntaxError as exc: raise ImportError("Failed to detect encoding", name=fullname) from exc newline_decoder = _io.IncrementalNewlineDecoder(None, True) try: return newline_decoder.decode(source_bytes.decode(encoding[0])) except UnicodeDecodeError as exc: raise ImportError("Failed to decode source file", name=fullname) from exc
Example #2
Source File: imp.py From scylla with Apache License 2.0 | 6 votes |
def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: if not self.file.closed: file = self.file else: self.file = file = open(self.path, 'r') with file: # Technically should be returning bytes, but # SourceLoader.get_code() just passed what is returned to # compile() which can handle str. And converting to bytes would # require figuring out the encoding to decode to and # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path)
Example #3
Source File: imp.py From jawfish with MIT License | 6 votes |
def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: if not self.file.closed: file = self.file else: self.file = file = open(self.path, 'r') with file: # Technically should be returning bytes, but # SourceLoader.get_code() just passed what is returned to # compile() which can handle str. And converting to bytes would # require figuring out the encoding to decode to and # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path)
Example #4
Source File: imp.py From python with Apache License 2.0 | 6 votes |
def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: if not self.file.closed: file = self.file else: self.file = file = open(self.path, 'r') with file: # Technically should be returning bytes, but # SourceLoader.get_code() just passed what is returned to # compile() which can handle str. And converting to bytes would # require figuring out the encoding to decode to and # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path)
Example #5
Source File: imp.py From kobo-predict with BSD 2-Clause "Simplified" License | 6 votes |
def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: if not self.file.closed: file = self.file else: self.file = file = open(self.path, 'r') with file: # Technically should be returning bytes, but # SourceLoader.get_code() just passed what is returned to # compile() which can handle str. And converting to bytes would # require figuring out the encoding to decode to and # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path)
Example #6
Source File: openpy.py From Computable with MIT License | 6 votes |
def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True): """Converts a bytes string with python source code to unicode. Unicode strings are passed through unchanged. Byte strings are checked for the python source file encoding cookie to determine encoding. txt can be either a bytes buffer or a string containing the source code. """ if isinstance(txt, unicode): return txt if isinstance(txt, bytes): buffer = BytesIO(txt) else: buffer = txt try: encoding, _ = detect_encoding(buffer.readline) except SyntaxError: encoding = "ascii" buffer.seek(0) text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) text.mode = 'r' if skip_encoding_cookie: return u"".join(strip_encoding_cookie(text)) else: return text.read()
Example #7
Source File: _pydev_execfile.py From PyDev.Debugger with Eclipse Public License 1.0 | 6 votes |
def execfile(file, glob=None, loc=None): if glob is None: import sys glob = sys._getframe().f_back.f_globals if loc is None: loc = glob # It seems that the best way is using tokenize.open(): http://code.activestate.com/lists/python-dev/131251/ # (but tokenize.open() is only available for python 3.2) import tokenize if hasattr(tokenize, 'open'): # version 3.2 stream = tokenize.open(file) # @UndefinedVariable else: # version 3.0 or 3.1 detect_encoding = tokenize.detect_encoding(open(file, mode="rb" ).readline) stream = open(file, encoding=detect_encoding[0]) try: contents = stream.read() finally: stream.close() #execute the script (note: it's important to compile first to have the filename set in debug mode) exec(compile(contents+"\n", file, 'exec'), glob, loc)
Example #8
Source File: imp.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: if not self.file.closed: file = self.file else: self.file = file = open(self.path, 'r') with file: # Technically should be returning bytes, but # SourceLoader.get_code() just passed what is returned to # compile() which can handle str. And converting to bytes would # require figuring out the encoding to decode to and # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path)
Example #9
Source File: imp.py From Fluid-Designer with GNU General Public License v3.0 | 6 votes |
def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: if not self.file.closed: file = self.file else: self.file = file = open(self.path, 'r') with file: # Technically should be returning bytes, but # SourceLoader.get_code() just passed what is returned to # compile() which can handle str. And converting to bytes would # require figuring out the encoding to decode to and # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path)
Example #10
Source File: imp.py From ironpython3 with Apache License 2.0 | 6 votes |
def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: if not self.file.closed: file = self.file else: self.file = file = open(self.path, 'r') with file: # Technically should be returning bytes, but # SourceLoader.get_code() just passed what is returned to # compile() which can handle str. And converting to bytes would # require figuring out the encoding to decode to and # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path)
Example #11
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_tokenize(self): import tokenize as tokenize_module encoding = object() encoding_used = None def mock_detect_encoding(readline): return encoding, [b'first', b'second'] def mock__tokenize(readline, encoding): nonlocal encoding_used encoding_used = encoding out = [] while True: next_line = readline() if next_line: out.append(next_line) continue return out counter = 0 def mock_readline(): nonlocal counter counter += 1 if counter == 5: return b'' return str(counter).encode() orig_detect_encoding = tokenize_module.detect_encoding orig__tokenize = tokenize_module._tokenize tokenize_module.detect_encoding = mock_detect_encoding tokenize_module._tokenize = mock__tokenize try: results = tokenize(mock_readline) self.assertEqual(list(results), [b'first', b'second', b'1', b'2', b'3', b'4']) finally: tokenize_module.detect_encoding = orig_detect_encoding tokenize_module._tokenize = orig__tokenize self.assertTrue(encoding_used, encoding)
Example #12
Source File: phystokens.py From coveragepy with Apache License 2.0 | 5 votes |
def _source_encoding_py3(source): """Determine the encoding for `source`, according to PEP 263. `source` is a byte string: the text of the program. Returns a string, the name of the encoding. """ readline = iternext(source.splitlines(True)) return tokenize.detect_encoding(readline)[0]
Example #13
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_utf8_normalization(self): # See get_normal_name() in tokenizer.c. encodings = ("utf-8", "utf-8-mac", "utf-8-unix") for encoding in encodings: for rep in ("-", "_"): enc = encoding.replace("-", rep) lines = (b"#!/usr/bin/python\n", b"# coding: " + enc.encode("ascii") + b"\n", b"1 + 3\n") rl = self.get_readline(lines) found, consumed_lines = detect_encoding(rl) self.assertEqual(found, "utf-8")
Example #14
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_syntaxerror_latin1(self): # Issue 14629: need to raise SyntaxError if the first # line(s) have non-UTF-8 characters lines = ( b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S ) readline = self.get_readline(lines) self.assertRaises(SyntaxError, detect_encoding, readline)
Example #15
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_false_encoding(self): # Issue 18873: "Encoding" detected in non-comment lines readline = self.get_readline((b'print("#coding=fake")',)) encoding, consumed_lines = detect_encoding(readline) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
Example #16
Source File: pycodestyle.py From blackmamba with MIT License | 5 votes |
def readlines(filename): """Read the source code.""" try: with open(filename, 'rb') as f: (coding, lines) = tokenize.detect_encoding(f.readline) f = TextIOWrapper(f, coding, line_buffering=True) return [line.decode(coding) for line in lines] + f.readlines() except (LookupError, SyntaxError, UnicodeError): # Fall back if file encoding is improperly declared with open(filename, encoding='latin-1') as f: return f.readlines()
Example #17
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_cookie_second_line_no_bom(self): lines = ( b'#! something\n', b'# vim: set fileencoding=ascii :\n', b'print(something)\n', b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'ascii') expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n'] self.assertEqual(consumed_lines, expected)
Example #18
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self): lines = ( b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n', b'print(something)\n', b'do_something(else)\n' ) readline = self.get_readline(lines) self.assertRaises(SyntaxError, detect_encoding, readline)
Example #19
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_matched_bom_and_cookie_first_line(self): lines = ( b'\xef\xbb\xbf# coding=utf-8\n', b'print(something)\n', b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
Example #20
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_bom_no_cookie(self): lines = ( b'\xef\xbb\xbf# something\n', b'print(something)\n', b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'# something\n', b'print(something)\n'])
Example #21
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_no_bom_no_encoding_cookie(self): lines = ( b'# something\n', b'print(something)\n', b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, list(lines[:2]))
Example #22
Source File: test_unparse.py From ironpython3 with Apache License 2.0 | 5 votes |
def read_pyfile(filename): """Read and return the contents of a Python source file (as a string), taking into account the file encoding.""" with open(filename, "rb") as pyfile: encoding = tokenize.detect_encoding(pyfile.readline)[0] with open(filename, "r", encoding=encoding) as pyfile: source = pyfile.read() return source
Example #23
Source File: executing.py From executing with MIT License | 5 votes |
def decode_source(source): if isinstance(source, bytes): encoding, _ = detect_encoding(io.BytesIO(source).readline) source = source.decode(encoding) return source
Example #24
Source File: executing.py From executing with MIT License | 5 votes |
def decode_source(source): if isinstance(source, bytes): encoding = Source.detect_encoding(source) source = source.decode(encoding) return source
Example #25
Source File: builder.py From pySINDy with MIT License | 5 votes |
def file_build(self, path, modname=None): """Build astroid from a source code file (i.e. from an ast) *path* is expected to be a python source file """ try: stream, encoding, data = open_source_file(path) except IOError as exc: raise exceptions.AstroidBuildingError( "Unable to load file {path}:\n{error}", modname=modname, path=path, error=exc, ) from exc except (SyntaxError, LookupError) as exc: raise exceptions.AstroidSyntaxError( "Python 3 encoding specification error or unknown encoding:\n" "{error}", modname=modname, path=path, error=exc, ) from exc except UnicodeError as exc: # wrong encoding # detect_encoding returns utf-8 if no encoding specified raise exceptions.AstroidBuildingError( "Wrong or no encoding specified for {filename}.", filename=path ) from exc with stream: # get module name if necessary if modname is None: try: modname = ".".join(modutils.modpath_from_file(path)) except ImportError: modname = os.path.splitext(os.path.basename(path))[0] # build astroid representation module = self._data_build(data, modname, path) return self._post_build(module, encoding)
Example #26
Source File: builder.py From pySINDy with MIT License | 5 votes |
def open_source_file(filename): with open(filename, "rb") as byte_stream: encoding = detect_encoding(byte_stream.readline)[0] stream = open(filename, "r", newline=None, encoding=encoding) data = stream.read() return stream, encoding, data
Example #27
Source File: test_tokenize.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def test_tokenize(self): import tokenize as tokenize_module encoding = object() encoding_used = None def mock_detect_encoding(readline): return encoding, [b'first', b'second'] def mock__tokenize(readline, encoding): nonlocal encoding_used encoding_used = encoding out = [] while True: next_line = readline() if next_line: out.append(next_line) continue return out counter = 0 def mock_readline(): nonlocal counter counter += 1 if counter == 5: return b'' return str(counter).encode() orig_detect_encoding = tokenize_module.detect_encoding orig__tokenize = tokenize_module._tokenize tokenize_module.detect_encoding = mock_detect_encoding tokenize_module._tokenize = mock__tokenize try: results = tokenize(mock_readline) self.assertEqual(list(results), [b'first', b'second', b'1', b'2', b'3', b'4']) finally: tokenize_module.detect_encoding = orig_detect_encoding tokenize_module._tokenize = orig__tokenize self.assertTrue(encoding_used, encoding)
Example #28
Source File: test_tokenize.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def test_filename_in_exception(self): # When possible, include the file name in the exception. path = 'some_file_path' lines = ( b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S ) class Bunk: def __init__(self, lines, path): self.name = path self._lines = lines self._index = 0 def readline(self): if self._index == len(lines): raise StopIteration line = lines[self._index] self._index += 1 return line with self.assertRaises(SyntaxError): ins = Bunk(lines, path) # Make sure lacking a name isn't an issue. del ins.name detect_encoding(ins.readline) with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)): ins = Bunk(lines, path) detect_encoding(ins.readline)
Example #29
Source File: test_tokenize.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def test_false_encoding(self): # Issue 18873: "Encoding" detected in non-comment lines readline = self.get_readline((b'print("#coding=fake")',)) encoding, consumed_lines = detect_encoding(readline) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
Example #30
Source File: test_tokenize.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_matched_bom_and_cookie_second_line(self): lines = ( b'\xef\xbb\xbf#! something\n', b'f# coding=utf-8\n', b'print(something)\n', b'do_something(else)\n' ) encoding, consumed_lines = detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'#! something\n', b'f# coding=utf-8\n'])