Python Examples of pygments.lexers.get_lexer_for

Source File: textfmts.py From pygments with BSD 2-Clause "Simplified" License

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: textfmts.py From syntax-highlighting with GNU Affero General Public License v3.0

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: textfmts.py From android_universal with MIT License

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: textfmts.py From diaphora with GNU Affero General Public License v3.0

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: textfmts.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: textfmts.py From Carnets with BSD 3-Clause "New" or "Revised" License

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: textfmts.py From komodo-wakatime with BSD 3-Clause "New" or "Revised" License

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: pygments_sh.py From Turing with MIT License

6 votes

def set_lexer_from_filename(self, filename):
        """
        Change the lexer based on the filename (actually only the extension is
        needed)

        :param filename: Filename or extension
        """
        self._lexer = None
        if filename.endswith("~"):
            filename = filename[0:len(filename) - 1]
        try:
            self._lexer = get_lexer_for_filename(filename)
        except (ClassNotFound, ImportError):
            print('class not found for url', filename)
            try:
                m = mimetypes.guess_type(filename)
                print(m)
                self._lexer = get_lexer_for_mimetype(m[0])
            except (ClassNotFound, IndexError, ImportError):
                self._lexer = get_lexer_for_mimetype('text/plain')
        if self._lexer is None:
            _logger().warning('failed to get lexer from filename: %s, using '
                              'plain text instead...', filename)
            self._lexer = TextLexer()

Source File: httpdomain.py From couchdb-documentation with Apache License 2.0

6 votes

def content_callback(self, match):
        content_type = getattr(self, "content_type", None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype

            try:
                lexer = get_lexer_for_mimetype(content_type)
            except ClassNotFound:
                pass
            else:
                for idx, token, value in lexer.get_tokens_unprocessed(content):
                    yield offset + idx, token, value
                return
        yield offset, Text, content

Source File: textfmts.py From pySINDy with MIT License

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: textfmts.py From pigaios with GNU General Public License v3.0

6 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content

Source File: hexteditor.py From guppy-proxy with MIT License

5 votes

def fill_highlighted(self):
        with DisableUpdates(self.htmlxml_widg):
            self.highlighted_widg.setPlainText("")
            if not self.data:
                return
            ct = self.headers.get('Content-Type').lower()
            if ";" in ct:
                ct = ct.split(";")[0]
            try:
                lexer = get_lexer_for_mimetype(ct)
                highlighted = textedit_highlight(self.data, lexer)
            except:
                highlighted = printable_data(self.data)
            self.highlighted_widg.setHtml(highlighted)

Source File: reqview.py From guppy-proxy with MIT License

5 votes

def get_tokens_unprocessed(self, text):
        try:
            split = re.split(r"(?:\r\n|\n)(?:\r\n|\n)", text, 1)
            if len(split) == 2:
                h = split[0]
                body = split[1]
            else:
                h = split[0]
                body = ''
        except Exception as e:
            for v in self.tl.get_tokens_unprocessed(text):
                yield v
            raise e

        for token in self.hl.get_tokens_unprocessed(h):
            yield token

        if len(body) > 0:
            if len(body) <= self.max_len or self.max_len < 0:
                second_parser = None
                if "Content-Type" in h:
                    try:
                        ct = re.search("Content-Type: (.*)", h)
                        if ct is not None:
                            hval = ct.groups()[0]
                            mime = hval.split(";")[0]
                            second_parser = get_lexer_for_mimetype(mime)
                    except ClassNotFound:
                        pass
                if second_parser is None:
                    yield (len(h), Token.Text, text[len(h):])
                else:
                    for index, tokentype, value in second_parser.get_tokens_unprocessed(text[len(h):]):
                        yield (index + len(h), tokentype, value)
            else:
                yield (len(h), Token.Text, text[len(h):])

Source File: mime.py From pygments with BSD 2-Clause "Simplified" License

5 votes

def get_bodypart_tokens(self, text):
        # return if:
        #  * no content
        #  * no content type specific
        #  * content encoding is not readable
        #  * max recurrsion exceed
        if not text.strip() or not self.content_type:
            return [(0, Other, text)]

        cte = self.content_transfer_encoding
        if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
            return [(0, Other, text)]

        if self.max_nested_level == 0:
            return [(0, Other, text)]

        # get lexer
        try:
            lexer = get_lexer_for_mimetype(self.content_type)
        except ClassNotFound:
            return [(0, Other, text)]

        if isinstance(lexer, type(self)):
            lexer.max_nested_level = self.max_nested_level - 1

        return lexer.get_tokens_unprocessed(text)

Source File: md.py From codimension with GNU General Public License v3.0

5 votes

def get_lexer(text, lang):
    """Tries to get lexer for the text whether the lang is provided or not"""
    if lang:
        try:
            return get_lexer_by_name(lang, stripall=False)
        except:
            pass
        return None

    # No language provided, try to guess
    mime = getMagicMimeFromBuffer(text.strip())
    if mime:
        try:
            return get_lexer_for_mimetype(mime, stripall=False)
        except:
            pass

        # The pygments data sometimes miss mime options provided by python magic
        # library
        if mime.startswith('text/'):
            try:
                return get_lexer_for_mimetype(mime.replace('text/',
                                                           'application/'),
                                              stripall=False)
            except:
                pass

    return None


# The start/end tags could be:
# @startuml / @enduml
# @startgantt / @endgantt
# @startsalt / @endsalt
# @startmindmap / @endmindmap
# @startwbs / @endwbs
# @startditaa / @endditaa
# @startjcckit / @endjcckit

Source File: test_basic_api.py From pygments with BSD 2-Clause "Simplified" License

5 votes

def test_get_lexers():
    # test that the lexers functions work
    for func, args in [(lexers.get_lexer_by_name, ("python",)),
                       (lexers.get_lexer_for_filename, ("test.py",)),
                       (lexers.get_lexer_for_mimetype, ("text/x-python",)),
                       (lexers.guess_lexer, ("#!/usr/bin/python3 -O\nprint",)),
                       (lexers.guess_lexer_for_filename, ("a.py", "<%= @foo %>"))
                       ]:
        x = func(opt='val', *args)
        assert isinstance(x, lexers.PythonLexer)
        assert x.options["opt"] == "val"

    for cls, (_, lname, aliases, _, mimetypes) in lexers.LEXERS.items():
        assert cls == lexers.find_lexer_class(lname).__name__

        for alias in aliases:
            assert cls == lexers.get_lexer_by_name(alias).__class__.__name__

        for mimetype in mimetypes:
            assert cls == lexers.get_lexer_for_mimetype(mimetype).__class__.__name__

    try:
        lexers.get_lexer_by_name(None)
    except ClassNotFound:
        pass
    else:
        raise Exception

Source File: colors.py From pappy-proxy with MIT License

5 votes

def pretty_body(msg):
    from .util import printable_data
    to_ret = printable_data(msg.body, colors=False)
    if 'content-type' in msg.headers:
        try:
            lexer = get_lexer_for_mimetype(msg.headers.get('content-type').split(';')[0])
            to_ret = highlight(to_ret, lexer, TerminalFormatter())
        except:
            pass
    return to_ret

Source File: pygments_sh.py From Turing with MIT License

5 votes

def set_lexer_from_mime_type(self, mime, **options):
        """
        Sets the pygments lexer from mime type.

        :param mime: mime type
        :param options: optional addtional options.
        """
        self._lexer = get_lexer_for_mimetype(mime, **options)
        _logger().debug('lexer for mimetype (%s): %r', mime, self._lexer)

Source File: plugin.py From indico-plugins with MIT License

5 votes

def generate_content(cls, attachment):
        mime_type = attachment.file.content_type

        lexer = cls.CUSTOM_LEXERS.get(mime_type)
        if lexer is None:
            lexer = get_lexer_for_mimetype(mime_type)

        with attachment.file.open() as f:
            html_formatter = HtmlFormatter(style='tango', linenos='inline', prestyles='mono')
            html_code = highlight(f.read(), lexer, html_formatter)

        css_code = html_formatter.get_style_defs('.highlight')

        return render_template('previewer_code:pygments_preview.html', attachment=attachment,
                               html_code=html_code, css_code=css_code)

Source File: mime.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International

5 votes

def get_bodypart_tokens(self, text):
        # return if:
        #  * no content
        #  * no content type specific
        #  * content encoding is not readable
        #  * max recurrsion exceed
        if not text.strip() or not self.content_type:
            return [(0, Other, text)]

        cte = self.content_transfer_encoding
        if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
            return [(0, Other, text)]

        if self.max_nested_level == 0:
            return [(0, Other, text)]

        # get lexer
        try:
            lexer = get_lexer_for_mimetype(self.content_type)
        except ClassNotFound:
            return [(0, Other, text)]

        if isinstance(lexer, type(self)):
            lexer.max_nested_level = self.max_nested_level - 1

        return lexer.get_tokens_unprocessed(text)

Source File: httpdomain.py From nltk-server with MIT License

5 votes

def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            try:
                lexer = get_lexer_for_mimetype(content_type)
            except ClassNotFound:
                pass
            else:
                for idx, token, value in lexer.get_tokens_unprocessed(content):
                    yield offset + idx, token, value
                return
        yield offset, Text, content

Source File: syntax_highlighting.py From cauldron with MIT License

4 votes

def fetch_lexer(
        source: str,
        language: str = None,
        filename: str = None,
        mime_type: str = None
) -> Lexer:
    """

    :param source:
    :param language:
    :param filename:
    :param mime_type:
    :return:
    """

    environ.abort_thread()

    try:
        if language:
            return get_lexer_by_name(language, stripall=True)
    except ClassNotFound:
        pass

    if filename:
        try:
            return get_lexer_for_filename(filename, stripall=True)
        except ClassNotFound:
            pass

        try:
            return guess_lexer_for_filename(filename, source, stripall=True)
        except ClassNotFound:
            pass

    try:
        if mime_type:
            return get_lexer_for_mimetype(mime_type, stripall=True)
    except ClassNotFound:
        pass

    try:
        return guess_lexer(source, stripall=True)
    except ClassNotFound:
        return TextLexer()

Python pygments.lexers.get_lexer_for_mimetype() Examples