Python Examples of pypandoc.convert

Source File: report_generator.py From qb with MIT License

6 votes

def create(self, variables, md_output, pdf_output):
        env = Environment(loader=PackageLoader('qanta', 'reporting/templates'))
        template = env.get_template(self.template)
        markdown = template.render(variables)
        if md_output is not None:
            with open(md_output, 'w') as f:
                f.write(markdown)
        try:
            import pypandoc
            pypandoc.convert_text(
                markdown,
                'pdf',
                format='md',
                outputfile=pdf_output,
                extra_args=['-V', 'geometry:margin=.75in']
            )
        except Exception as e:
            log.warn('Pandoc was not installed or there was an error calling it, omitting PDF report')
            log.warn(str(e))

Source File: mxdoc.py From SNIPER-mxnet with Apache License 2.0

6 votes

def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i,l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j,c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(
                    c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out

Source File: mkdsupport.py From metaknowledge with GNU General Public License v2.0

6 votes

def pandoc_process(app, what, name, obj, options, lines):
    """"Convert docstrings in Markdown into reStructureText using pandoc
    """

    if not lines:
        return None

    input_format = app.config.mkdsupport_use_parser
    output_format = 'rst'

    # Since default encoding for sphinx.ext.autodoc is unicode and pypandoc.convert_text, which will always return a
    # unicode string, expects unicode or utf-8 encodes string, there is on need for dealing with coding
    text = SEP.join(lines)
    text = pypandoc.convert_text(text, output_format, format=input_format)

    # The 'lines' in Sphinx is a list of strings and the value should be changed
    del lines[:]
    lines.extend(text.split(SEP))

Source File: docntbk.py From sporco with BSD 3-Clause "New" or "Revised" License

6 votes

def rst_to_notebook(infile, outfile, diridx=False):
    """Convert an rst file to a notebook file."""

    # Read infile into a string
    with open(infile, 'r') as fin:
        rststr = fin.read()
    # Convert string from rst to markdown
    mdfmt = 'markdown_github+tex_math_dollars+fenced_code_attributes'
    mdstr = pypandoc.convert_text(rststr, mdfmt, format='rst',
                                  extra_args=['--atx-headers'])
    # In links, replace .py extensions with .ipynb
    mdstr = re.sub(r'\(([^\)]+).py\)', r'(\1.ipynb)', mdstr)
    # Links to subdirectories require explicit index file inclusion
    if diridx:
        mdstr = re.sub(r']\(([^\)/]+)\)', r'](\1/index.ipynb)', mdstr)
    # Enclose the markdown within triple quotes and convert from
    # python to notebook
    mdstr = '"""' + mdstr + '"""'
    nb = py2jn.py_string_to_notebook(mdstr)
    py2jn.tools.write_notebook(nb, outfile, nbver=4)

Source File: mxdoc.py From training_results_v0.6 with Apache License 2.0

6 votes

def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i,l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j,c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(
                    c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out

Source File: notebook.py From sphinx-gallery with BSD 3-Clause "New" or "Revised" License

6 votes

def fill_notebook(work_notebook, script_blocks, gallery_conf):
    """Writes the Jupyter notebook cells

    If available, uses pypandoc to convert rst to markdown.

    Parameters
    ----------
    script_blocks : list
        Each list element should be a tuple of (label, content, lineno).
    """
    for blabel, bcontent, lineno in script_blocks:
        if blabel == 'code':
            add_code_cell(work_notebook, bcontent)
        else:
            if gallery_conf["pypandoc"] is False:
                markdown = rst2md(bcontent + '\n')
            else:
                import pypandoc
                # pandoc automatically addds \n to the end
                markdown = pypandoc.convert_text(
                    bcontent, to='md', format='rst', **gallery_conf["pypandoc"]
                )
            add_markdown_cell(work_notebook, markdown)

Source File: mxdoc.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i,l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j,c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(
                    c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out

Source File: redmine_to_github.py From pyweed with GNU Lesser General Public License v3.0

6 votes

def convert_issue_data(self, redmine_issue):
        """
        Generate the data for a new GitHub issue
        """
        description_md = convert_text(
            redmine_issue['description'], 'markdown_github', 'textile'
        )
        porting_note = '###### ported from Redmine #%s (created %s)' % (
            redmine_issue['id'],
            redmine_issue['created_on'].split('T')[0]
        )
        if self.is_closed(redmine_issue):
            porting_note = '%s (CLOSED %s)' % (
                porting_note,
                redmine_issue['closed_on'].split('T')[0]
            )
        body = "%s\n\n%s" % (porting_note, description_md)
        title = "%(subject)s (RM#%(id)s)" % redmine_issue
        return {
            "title": title,
            "body": body,
            "assignees": ["adam-iris"],
        }

Source File: twlight_wikicode2html.py From TWLight with MIT License

5 votes

def twlight_wikicode2html(value):
    """Passes string through pandoc and returns html"""
    output = pypandoc.convert_text(value, "html", format="mediawiki")
    return output

Source File: formatter.py From pytablereader with MIT License

5 votes

def __init__(self, source_data):
        try:
            import pypandoc
        except ImportError as e:
            # pypandoc package may do not installed in the system since the package is
            # an optional dependency
            raise PypandocImportError(e)

        super().__init__(pypandoc.convert_text(source_data, "html", format="mediawiki"))

Source File: import_grundgesetz.py From oldp with MIT License

5 votes

def handle_law_from_xml(self, book, book_xml) -> LawBook:
        previous_law = None
        law_order = 1

        # Parse XML tree
        tree = etree.fromstring(book_xml)

        for sect in tree.xpath('sect1'):
            section_title = sect.xpath('title/text()')[0]
            logger.debug('Section: %s' % section_title)

            # if section_title == 'Grundgesetz für die Bundesrepublik Deutschland':
            #     continue

            book.add_section(from_order=law_order, title=section_title.strip())

            for law_key, law_raw in enumerate(sect.xpath('sect2')):
                law_title = law_raw.xpath('title')[0]
                law_title.getparent().remove(law_title)

                # law_docbook = tostring(law_raw).decode('utf-8')
                law_docbook = '\n'.join(tostring(x).decode('utf-8') for x in law_raw.iterchildren())
                law_text = pypandoc.convert_text(law_docbook, 'html', format='docbook')
                law_section = tostring(law_title, method="text").decode('utf-8').strip()

                law = Law(book=book,
                          title='',
                          section=law_section,
                          slug=slugify(law_section),
                          content=law_text,
                          previous=previous_law,
                          order=law_order
                          )
                law.save()
                law_order += 1
                previous_law = law

        return book

Source File: test_stitcher.py From stitch with MIT License

5 votes

def as_json(document):
    "JSON representation of the markdown document"
    return json.loads(pypandoc.convert_text(document, 'json',
                                            format='markdown'))

Source File: stitch.py From stitch with MIT License

5 votes

def tokenize_block(source: str, pandoc_extra_args: list=None) -> list:
    """
    Convert a Jupyter output to Pandoc's JSON AST.
    """
    if pandoc_extra_args is None:
        pandoc_extra_args = []
    json_doc = pypandoc.convert_text(source, to='json', format='markdown', extra_args=pandoc_extra_args)
    return json.loads(json_doc)['blocks']

Source File: stitch.py From stitch with MIT License

5 votes

def tokenize(source: str) -> dict:
    """
    Convert a document to pandoc's JSON AST.
    """
    return json.loads(pypandoc.convert_text(source, 'json', 'markdown'))

Source File: stitch.py From stitch with MIT License

5 votes

def convert(source: str, to: str, extra_args=(),
            output_file: str=None) -> None:
    """
    Convert a source document to an output file.

    Parameters
    ----------
    source : str
    to : str
    extra_args : iterable
    output_file : str

    Notes
    -----
    Either writes to ``output_file`` or prints to stdout.
    """
    output_name = (
        os.path.splitext(os.path.basename(output_file))[0]
        if output_file is not None
        else 'std_out'
    )

    standalone = '--standalone' in extra_args
    self_contained = '--self-contained' in extra_args
    use_prompt = '--use-prompt' in extra_args
    extra_args = [item for item in extra_args if item != '--use-prompt']
    stitcher = Stitch(name=output_name, to=to, standalone=standalone,
                      self_contained=self_contained, use_prompt=use_prompt)
    result = stitcher.stitch(source)
    result = json.dumps(result)
    newdoc = pypandoc.convert_text(result, to, format='json',
                                   extra_args=extra_args,
                                   outputfile=output_file)

    if output_file is None:
        print(newdoc)

Source File: helpers.py From Apostrophe with GNU General Public License v3.0

5 votes

def pandoc_convert(text, to="html5", args=[], outputfile=None):
    fr = Settings.new().get_value('input-format').get_string() or "markdown"
    args.extend(["--quiet"])
    return pypandoc.convert_text(text, to, fr, extra_args=args, outputfile=outputfile)

Source File: publish-gh-release-notes.py From pytest with MIT License

5 votes

def convert_rst_to_md(text):
    return pypandoc.convert_text(
        text, "md", format="rst", extra_args=["--wrap=preserve"]
    )

Source File: twlight_wikicode2html.py From TWLight with MIT License

5 votes

def twlight_wikicode2html(value):
    """Passes string through pandoc and returns html"""
    output = pypandoc.convert_text(value, "html", format="mediawiki")
    return output

Source File: RSSParser.py From feedDiasp with GNU General Public License v2.0

5 votes

def html2markdown(html: str) -> str:
    """
    Returns the given HTML as equivalent Markdown-structured text.
    """
    try:
        return pypandoc.convert_text(html, 'md', format='html')
    except OSError:
        msg = "It's recommended to install the `pandoc` library for converting " \
              "HTML into Markdown-structured text. It tends to have better results" \
              "than `html2text`, which is now used as a fallback."
        print(msg)
        return html2text(html)

Source File: utils.py From insightconnect-plugins with MIT License

5 votes

def convert(content, from_format, to_format, use_file=False):
    if use_file:
        filename = make_file(to_format)
    else:
        filename = None
    output = pypandoc.convert_text(
        content, to_format, format=from_format, outputfile=filename)
    if use_file:
        content = read_file(filename)
        try:
            return content.decode('UTF-8')
        except UnicodeDecodeError:
            return content.decode('latin-1')
    else:
        return output

Source File: publish_gh_release_notes.py From rasa-for-botfront with Apache License 2.0

5 votes

def convert_rst_to_md(text):
    return pypandoc.convert_text(
        text, "md", format="rst", extra_args=["--wrap=preserve"]
    )

Source File: publish_gh_release_notes.py From rasa-sdk with Apache License 2.0

5 votes

def convert_rst_to_md(text):
    return pypandoc.convert_text(
        text, "md", format="rst", extra_args=["--wrap=preserve"]
    )

Source File: converters.py From django-htk with MIT License

5 votes

def html2markdown(html):
    """Converts `html` to Markdown-formatted text
    """
    markdown_text = pypandoc.convert_text(html, 'markdown_strict', format='html')
    return markdown_text

Source File: utils.py From podoc with BSD 3-Clause "New" or "Revised" License

5 votes

def get_pandoc_api_version():
    import pypandoc
    return json.loads(pypandoc.convert_text('', 'json', format='markdown'))['pandoc-api-version']

Source File: _markdown.py From podoc with BSD 3-Clause "New" or "Revised" License

5 votes

def read(self, contents, context=None):
        assert isinstance(contents, str)
        js = pypandoc.convert_text(contents, 'json', format=PANDOC_MARKDOWN_FORMAT)
        ast = ASTPlugin().loads(js)
        return ast

Source File: descriptor_set_tasks.py From artman with Apache License 2.0

5 votes

def md2rst(comment):
    """Convert a comment from protobuf markdown to restructuredtext.

    This method:
    - Replaces proto links with literals (e.g. [Foo][bar.baz.Foo] -> `Foo`)
    - Resolves relative URLs to https://cloud.google.com
    - Runs pandoc to convert from markdown to restructuredtext
    """
    comment = _replace_proto_link(comment)
    comment = _replace_relative_link(comment)
    # Calling pypandoc.convert_text is slow, so we try to avoid it if there are
    # no special characters in the markdown.
    if any([i in comment for i in '`[]*_']):
        comment = pypandoc.convert_text(comment, 'rst', format='commonmark')
        # Comments are now valid restructuredtext, but there is a problem. They
        # are being inserted back into a descriptor set, and there is an
        # expectation that each line of a comment will begin with a space, to
        # separate it from the '//' that begins the comment. You would think
        # that we could ignore this detail, but it will cause formatting
        # problems down the line in gapic-generator because parsing code will
        # try to remove the leading space, affecting the indentation of lines
        # that actually do begin with a space, so we insert the additional
        # space now. Comments that are not processed by pypandoc will already
        # have a leading space, so should not be changed.
        comment = _insert_spaces(comment)
    return comment

Source File: wiki.py From redmine-gitlab-migrator with GNU General Public License v3.0

4 votes

def convert(self, text):
        text = '\n\n'.join([re.sub(self.regexCodeBlock, r'<pre>\1</pre>', block) for block in text.split('\n\n')])

        collapseResults = re.findall(self.regexCollapse, text)
        if len(collapseResults) > 0:
            for i in range(0, len(collapseResults)):
                text = text.replace(collapseResults[i][0], "<details>")
                text = text.replace(collapseResults[i][2], "<summary>{}</summary> \n\n{}".format(collapseResults[i][1], collapseResults[i][2]))
                text = text.replace(collapseResults[i][3], "</details>")
        text = re.sub(self.regexParagraph, "", text)

        # convert from textile to markdown
        try:
            text = pypandoc.convert_text(text, 'markdown_strict', format='textile')

            # pandoc does not convert everything, notably the [[link|text]] syntax
            # is not handled. So let's fix that.

            # [[ wikipage | link_text ]] -> [link_text](wikipage)
            text = re.sub(self.regexWikiLinkWithText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

            # [[ link_url ]] -> [link_url](link_url)
            text = re.sub(self.regexWikiLinkWithoutText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

            # nested lists, fix at least the common issues
            text = text.replace("    \\#\\*", "    -")
            text = text.replace("    \\*\\#", "    1.")

            # Redmine is using '>' for blockquote, which is not textile
            text = text.replace("&gt; ", ">")

            # wiki note macros
            text = re.sub(self.regexTipMacro, r'---\n**TIP**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
            text = re.sub(self.regexNoteMacro, r'---\n**NOTE**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
            text = re.sub(self.regexWarningMacro, r'---\n**WARNING**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
            text = re.sub(self.regexImportantMacro, r'---\n**IMPORTANT**: \1\n---\n', text, re.MULTILINE | re.DOTALL)

            # all other macros
            text = re.sub(self.regexAnyMacro, r'\1', text, re.MULTILINE | re.DOTALL)

            # attachments in notes
            text = re.sub(self.regexAttachment, r"\n\n*(Merged from Redmine, please check first note for attachment named **\1**)*", text, re.MULTILINE | re.DOTALL)

            # code highlight
            codeHighlights = re.findall(self.regexCodeHighlight, text)
            if len(codeHighlights) > 0:
                for i in range(0, len(codeHighlights)):
                    text = text.replace(codeHighlights[i][0], "\n```{}".format(codeHighlights[i][2].lower()))
                    text = text.replace(codeHighlights[i][3], "\n```")
        except RuntimeError as e:
            return False
        return text

Source File: convert_jupyter_to_py.py From gempy with GNU Lesser General Public License v3.0

4 votes

def convert_ipynb_to_gallery(nb, new_file):
    python_file = ""

    nb_dict = json.load(open(nb, encoding="utf8", errors='ignore'))
    cells = nb_dict['cells']

    for i, cell in enumerate(cells):
        if i == 0:
            if cell['cell_type'] != 'markdown':
                rst_source = os.path.basename(file_name[:-5])
                rst_source = bytes(rst_source, 'utf-8').decode('utf-8', 'ignore')
                python_file = '"""\n' + rst_source + '\n"""'
                source = ''.join(cell['source'])
                python_file = python_file + '\n' * 2 + source

            else:
                b = cell['source']
                print(b)
                a = bytes(cell['source'][0], 'utf-8').decode('utf-8', 'ignore')
                print(a)
                md_source = ''.join(a)
                rst_source = pdoc.convert_text(md_source, 'rst', 'md')
                print(rst_source)
                rst_source = bytes(rst_source, 'utf-8').decode('utf-8', 'ignore')
                python_file = '"""\n' + rst_source + '\n"""'
        else:
            if cell['cell_type'] == 'markdown':
                md_source = ''.join(cell['source'])
                rst_source = pdoc.convert_text(md_source, 'rst', 'md')
                rst_source = rst_source.encode().decode('utf-8', 'ignore')
                commented_source = '\n'.join(['# ' + x for x in
                                              rst_source.split('\n')])
                #python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \
                #    commented_source

                python_file = python_file + '\n\n\n' + '# %%' + '\n' + \
                              commented_source

            elif cell['cell_type'] == 'code':
                source = ''.join(cell['source'])
                python_file = python_file + '\n' * 2 + '# %% \n' + source

    python_file = python_file.replace("\n%", "\n# %")
    open(new_file, 'w', newline='',  errors='ignore').write(python_file)

#%%

Source File: gendoc.py From koalas with Apache License 2.0

4 votes

def gen_release_notes(path):
    """
    Generate reStructuredText files for "Release Notes". It generates 'index.rst' file and
    each rst file for each version's release note under 'whatsnew' directory.
    The contents are from Github release notes.
    """
    whatsnew_dir = "%s/whatsnew" % path
    shutil.rmtree(whatsnew_dir, ignore_errors=True)
    os.mkdir(whatsnew_dir)

    with open("%s/index.rst" % whatsnew_dir, "a") as index_file:
        title = "Release Notes"

        index_file.write("=" * len(title))
        index_file.write("\n")
        index_file.write(title)
        index_file.write("\n")
        index_file.write("=" * len(title))
        index_file.write("\n")
        index_file.write("\n")
        index_file.write(".. toctree::")
        index_file.write("   :maxdepth: 1")
        index_file.write("\n")
        index_file.write("\n")

        for name, tag_name, body in list_releases_to_document(ks.__version__):
            release_doc = pypandoc.convert_text(body, "rst", format="md")

            # Make PR reference link pretty.
            # Replace ", #..." to ", `...<https://github.com/databricks/koalas/pull/...>`_"
            release_doc = re.sub(
                r', #(\d+)',
                r', `#\1 <https://github.com/databricks/koalas/pull/\1>`_', release_doc)
            # Replace "(#..." to "(`...<https://github.com/databricks/koalas/pull/...>`_"
            release_doc = re.sub(
                r'\(#(\d+)',
                r'(`#\1 <https://github.com/databricks/koalas/pull/\1>`_', release_doc)

            index_file.write("   " + tag_name)
            index_file.write("\n")
            index_file.write("\n")

            with open("%s/%s.rst" % (whatsnew_dir, tag_name), "a") as release_file:
                release_file.write("=" * len(name))
                release_file.write("\n")
                release_file.write(name)
                release_file.write("\n")
                release_file.write("=" * len(name))
                release_file.write("\n")
                release_file.write("\n")
                release_file.write(release_doc)
                release_file.write("\n")
                release_file.write("\n")

Source File: utils.py From rdmo with Apache License 2.0

4 votes

def render_to_format(request, format, title, template_src, context):
    if format in dict(settings.EXPORT_FORMATS):

        # render the template to a html string
        template = get_template(template_src)
        html = template.render(context)

        # remove empty lines
        html = os.linesep.join([line for line in html.splitlines() if line.strip()])

        if format == 'html':

            # create the response object
            response = HttpResponse(html)

        else:
            if format == 'pdf':
                # check pandoc version (the pdf arg changed to version 2)
                if pypandoc.get_pandoc_version().split('.')[0] == '1':
                    args = ['-V', 'geometry:margin=1in', '--latex-engine=xelatex']
                else:
                    args = ['-V', 'geometry:margin=1in', '--pdf-engine=xelatex']

                content_disposition = 'filename="%s.%s"' % (title, format)
            else:
                args = []
                content_disposition = 'attachment; filename="%s.%s"' % (title, format)

            # use reference document for certain file formats
            refdoc = set_export_reference_document(format)
            if refdoc is not None and (format == 'docx' or format == 'odt'):
                if pypandoc.get_pandoc_version().startswith("1"):
                    refdoc_param = '--reference-' + format + '=' + refdoc
                    args.extend([refdoc_param])
                else:
                    refdoc_param = '--reference-doc=' + refdoc
                    args.extend([refdoc_param])

            # create a temporary file
            (tmp_fd, tmp_filename) = mkstemp('.' + format)

            log.info("Export " + format + " document using args " + str(args))
            # convert the file using pandoc
            pypandoc.convert_text(html, format, format='html', outputfile=tmp_filename, extra_args=args)

            # read the temporary file
            file_handler = os.fdopen(tmp_fd, 'rb')
            file_content = file_handler.read()
            file_handler.close()

            # delete the temporary file
            os.remove(tmp_filename)

            # create the response object
            response = HttpResponse(file_content, content_type='application/%s' % format)
            response['Content-Disposition'] = content_disposition.encode('utf-8')

        return response
    else:
        return HttpResponseBadRequest(_('This format is not supported.'))

Python pypandoc.convert_text() Examples