Python pygments.lexers.get_lexer_for_mimetype() Examples

The following are 22 code examples of pygments.lexers.get_lexer_for_mimetype(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pygments.lexers , or try the search function .
Example #1
Source File: textfmts.py    From pygments with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #2
Source File: textfmts.py    From syntax-highlighting with GNU Affero General Public License v3.0 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #3
Source File: textfmts.py    From android_universal with MIT License 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #4
Source File: textfmts.py    From diaphora with GNU Affero General Public License v3.0 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #5
Source File: textfmts.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #6
Source File: textfmts.py    From Carnets with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #7
Source File: textfmts.py    From komodo-wakatime with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #8
Source File: pygments_sh.py    From Turing with MIT License 6 votes vote down vote up
def set_lexer_from_filename(self, filename):
        """
        Change the lexer based on the filename (actually only the extension is
        needed)

        :param filename: Filename or extension
        """
        self._lexer = None
        if filename.endswith("~"):
            filename = filename[0:len(filename) - 1]
        try:
            self._lexer = get_lexer_for_filename(filename)
        except (ClassNotFound, ImportError):
            print('class not found for url', filename)
            try:
                m = mimetypes.guess_type(filename)
                print(m)
                self._lexer = get_lexer_for_mimetype(m[0])
            except (ClassNotFound, IndexError, ImportError):
                self._lexer = get_lexer_for_mimetype('text/plain')
        if self._lexer is None:
            _logger().warning('failed to get lexer from filename: %s, using '
                              'plain text instead...', filename)
            self._lexer = TextLexer() 
Example #9
Source File: httpdomain.py    From couchdb-documentation with Apache License 2.0 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, "content_type", None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype

            try:
                lexer = get_lexer_for_mimetype(content_type)
            except ClassNotFound:
                pass
            else:
                for idx, token, value in lexer.get_tokens_unprocessed(content):
                    yield offset + idx, token, value
                return
        yield offset, Text, content 
Example #10
Source File: textfmts.py    From pySINDy with MIT License 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #11
Source File: textfmts.py    From pigaios with GNU General Public License v3.0 6 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            possible_lexer_mimetypes = [content_type]
            if '+' in content_type:
                # application/calendar+xml can be treated as application/xml
                # if there's not a better match.
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
                                      content_type)
                possible_lexer_mimetypes.append(general_type)

            for i in possible_lexer_mimetypes:
                try:
                    lexer = get_lexer_for_mimetype(i)
                except ClassNotFound:
                    pass
                else:
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
                        yield offset + idx, token, value
                    return
        yield offset, Text, content 
Example #12
Source File: hexteditor.py    From guppy-proxy with MIT License 5 votes vote down vote up
def fill_highlighted(self):
        with DisableUpdates(self.htmlxml_widg):
            self.highlighted_widg.setPlainText("")
            if not self.data:
                return
            ct = self.headers.get('Content-Type').lower()
            if ";" in ct:
                ct = ct.split(";")[0]
            try:
                lexer = get_lexer_for_mimetype(ct)
                highlighted = textedit_highlight(self.data, lexer)
            except:
                highlighted = printable_data(self.data)
            self.highlighted_widg.setHtml(highlighted) 
Example #13
Source File: reqview.py    From guppy-proxy with MIT License 5 votes vote down vote up
def get_tokens_unprocessed(self, text):
        try:
            split = re.split(r"(?:\r\n|\n)(?:\r\n|\n)", text, 1)
            if len(split) == 2:
                h = split[0]
                body = split[1]
            else:
                h = split[0]
                body = ''
        except Exception as e:
            for v in self.tl.get_tokens_unprocessed(text):
                yield v
            raise e

        for token in self.hl.get_tokens_unprocessed(h):
            yield token

        if len(body) > 0:
            if len(body) <= self.max_len or self.max_len < 0:
                second_parser = None
                if "Content-Type" in h:
                    try:
                        ct = re.search("Content-Type: (.*)", h)
                        if ct is not None:
                            hval = ct.groups()[0]
                            mime = hval.split(";")[0]
                            second_parser = get_lexer_for_mimetype(mime)
                    except ClassNotFound:
                        pass
                if second_parser is None:
                    yield (len(h), Token.Text, text[len(h):])
                else:
                    for index, tokentype, value in second_parser.get_tokens_unprocessed(text[len(h):]):
                        yield (index + len(h), tokentype, value)
            else:
                yield (len(h), Token.Text, text[len(h):]) 
Example #14
Source File: mime.py    From pygments with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def get_bodypart_tokens(self, text):
        # return if:
        #  * no content
        #  * no content type specific
        #  * content encoding is not readable
        #  * max recurrsion exceed
        if not text.strip() or not self.content_type:
            return [(0, Other, text)]

        cte = self.content_transfer_encoding
        if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
            return [(0, Other, text)]

        if self.max_nested_level == 0:
            return [(0, Other, text)]

        # get lexer
        try:
            lexer = get_lexer_for_mimetype(self.content_type)
        except ClassNotFound:
            return [(0, Other, text)]

        if isinstance(lexer, type(self)):
            lexer.max_nested_level = self.max_nested_level - 1

        return lexer.get_tokens_unprocessed(text) 
Example #15
Source File: md.py    From codimension with GNU General Public License v3.0 5 votes vote down vote up
def get_lexer(text, lang):
    """Tries to get lexer for the text whether the lang is provided or not"""
    if lang:
        try:
            return get_lexer_by_name(lang, stripall=False)
        except:
            pass
        return None

    # No language provided, try to guess
    mime = getMagicMimeFromBuffer(text.strip())
    if mime:
        try:
            return get_lexer_for_mimetype(mime, stripall=False)
        except:
            pass

        # The pygments data sometimes miss mime options provided by python magic
        # library
        if mime.startswith('text/'):
            try:
                return get_lexer_for_mimetype(mime.replace('text/',
                                                           'application/'),
                                              stripall=False)
            except:
                pass

    return None


# The start/end tags could be:
# @startuml / @enduml
# @startgantt / @endgantt
# @startsalt / @endsalt
# @startmindmap / @endmindmap
# @startwbs / @endwbs
# @startditaa / @endditaa
# @startjcckit / @endjcckit 
Example #16
Source File: test_basic_api.py    From pygments with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_get_lexers():
    # test that the lexers functions work
    for func, args in [(lexers.get_lexer_by_name, ("python",)),
                       (lexers.get_lexer_for_filename, ("test.py",)),
                       (lexers.get_lexer_for_mimetype, ("text/x-python",)),
                       (lexers.guess_lexer, ("#!/usr/bin/python3 -O\nprint",)),
                       (lexers.guess_lexer_for_filename, ("a.py", "<%= @foo %>"))
                       ]:
        x = func(opt='val', *args)
        assert isinstance(x, lexers.PythonLexer)
        assert x.options["opt"] == "val"

    for cls, (_, lname, aliases, _, mimetypes) in lexers.LEXERS.items():
        assert cls == lexers.find_lexer_class(lname).__name__

        for alias in aliases:
            assert cls == lexers.get_lexer_by_name(alias).__class__.__name__

        for mimetype in mimetypes:
            assert cls == lexers.get_lexer_for_mimetype(mimetype).__class__.__name__

    try:
        lexers.get_lexer_by_name(None)
    except ClassNotFound:
        pass
    else:
        raise Exception 
Example #17
Source File: colors.py    From pappy-proxy with MIT License 5 votes vote down vote up
def pretty_body(msg):
    from .util import printable_data
    to_ret = printable_data(msg.body, colors=False)
    if 'content-type' in msg.headers:
        try:
            lexer = get_lexer_for_mimetype(msg.headers.get('content-type').split(';')[0])
            to_ret = highlight(to_ret, lexer, TerminalFormatter())
        except:
            pass
    return to_ret 
Example #18
Source File: pygments_sh.py    From Turing with MIT License 5 votes vote down vote up
def set_lexer_from_mime_type(self, mime, **options):
        """
        Sets the pygments lexer from mime type.

        :param mime: mime type
        :param options: optional addtional options.
        """
        self._lexer = get_lexer_for_mimetype(mime, **options)
        _logger().debug('lexer for mimetype (%s): %r', mime, self._lexer) 
Example #19
Source File: plugin.py    From indico-plugins with MIT License 5 votes vote down vote up
def generate_content(cls, attachment):
        mime_type = attachment.file.content_type

        lexer = cls.CUSTOM_LEXERS.get(mime_type)
        if lexer is None:
            lexer = get_lexer_for_mimetype(mime_type)

        with attachment.file.open() as f:
            html_formatter = HtmlFormatter(style='tango', linenos='inline', prestyles='mono')
            html_code = highlight(f.read(), lexer, html_formatter)

        css_code = html_formatter.get_style_defs('.highlight')

        return render_template('previewer_code:pygments_preview.html', attachment=attachment,
                               html_code=html_code, css_code=css_code) 
Example #20
Source File: mime.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def get_bodypart_tokens(self, text):
        # return if:
        #  * no content
        #  * no content type specific
        #  * content encoding is not readable
        #  * max recurrsion exceed
        if not text.strip() or not self.content_type:
            return [(0, Other, text)]

        cte = self.content_transfer_encoding
        if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
            return [(0, Other, text)]

        if self.max_nested_level == 0:
            return [(0, Other, text)]

        # get lexer
        try:
            lexer = get_lexer_for_mimetype(self.content_type)
        except ClassNotFound:
            return [(0, Other, text)]

        if isinstance(lexer, type(self)):
            lexer.max_nested_level = self.max_nested_level - 1

        return lexer.get_tokens_unprocessed(text) 
Example #21
Source File: httpdomain.py    From nltk-server with MIT License 5 votes vote down vote up
def content_callback(self, match):
        content_type = getattr(self, 'content_type', None)
        content = match.group()
        offset = match.start()
        if content_type:
            from pygments.lexers import get_lexer_for_mimetype
            try:
                lexer = get_lexer_for_mimetype(content_type)
            except ClassNotFound:
                pass
            else:
                for idx, token, value in lexer.get_tokens_unprocessed(content):
                    yield offset + idx, token, value
                return
        yield offset, Text, content 
Example #22
Source File: syntax_highlighting.py    From cauldron with MIT License 4 votes vote down vote up
def fetch_lexer(
        source: str,
        language: str = None,
        filename: str = None,
        mime_type: str = None
) -> Lexer:
    """

    :param source:
    :param language:
    :param filename:
    :param mime_type:
    :return:
    """

    environ.abort_thread()

    try:
        if language:
            return get_lexer_by_name(language, stripall=True)
    except ClassNotFound:
        pass

    if filename:
        try:
            return get_lexer_for_filename(filename, stripall=True)
        except ClassNotFound:
            pass

        try:
            return guess_lexer_for_filename(filename, source, stripall=True)
        except ClassNotFound:
            pass

    try:
        if mime_type:
            return get_lexer_for_mimetype(mime_type, stripall=True)
    except ClassNotFound:
        pass

    try:
        return guess_lexer(source, stripall=True)
    except ClassNotFound:
        return TextLexer()