Python pypandoc.convert_text() Examples
The following are 30
code examples of pypandoc.convert_text().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pypandoc
, or try the search function
.
Example #1
Source File: report_generator.py From qb with MIT License | 6 votes |
def create(self, variables, md_output, pdf_output): env = Environment(loader=PackageLoader('qanta', 'reporting/templates')) template = env.get_template(self.template) markdown = template.render(variables) if md_output is not None: with open(md_output, 'w') as f: f.write(markdown) try: import pypandoc pypandoc.convert_text( markdown, 'pdf', format='md', outputfile=pdf_output, extra_args=['-V', 'geometry:margin=.75in'] ) except Exception as e: log.warn('Pandoc was not installed or there was an error calling it, omitting PDF report') log.warn(str(e))
Example #2
Source File: mxdoc.py From SNIPER-mxnet with Apache License 2.0 | 6 votes |
def _convert_md_table_to_rst(table): """Convert a markdown table to rst format""" if len(table) < 3: return '' out = '```eval_rst\n.. list-table::\n :header-rows: 1\n\n' for i,l in enumerate(table): cols = l.split('|')[1:-1] if i == 0: ncol = len(cols) else: if len(cols) != ncol: return '' if i == 1: for c in cols: if len(c) is not 0 and '---' not in c: return '' else: for j,c in enumerate(cols): out += ' * - ' if j == 0 else ' - ' out += pypandoc.convert_text( c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n' out += '```\n' return out
Example #3
Source File: mkdsupport.py From metaknowledge with GNU General Public License v2.0 | 6 votes |
def pandoc_process(app, what, name, obj, options, lines): """"Convert docstrings in Markdown into reStructureText using pandoc """ if not lines: return None input_format = app.config.mkdsupport_use_parser output_format = 'rst' # Since default encoding for sphinx.ext.autodoc is unicode and pypandoc.convert_text, which will always return a # unicode string, expects unicode or utf-8 encodes string, there is on need for dealing with coding text = SEP.join(lines) text = pypandoc.convert_text(text, output_format, format=input_format) # The 'lines' in Sphinx is a list of strings and the value should be changed del lines[:] lines.extend(text.split(SEP))
Example #4
Source File: docntbk.py From sporco with BSD 3-Clause "New" or "Revised" License | 6 votes |
def rst_to_notebook(infile, outfile, diridx=False): """Convert an rst file to a notebook file.""" # Read infile into a string with open(infile, 'r') as fin: rststr = fin.read() # Convert string from rst to markdown mdfmt = 'markdown_github+tex_math_dollars+fenced_code_attributes' mdstr = pypandoc.convert_text(rststr, mdfmt, format='rst', extra_args=['--atx-headers']) # In links, replace .py extensions with .ipynb mdstr = re.sub(r'\(([^\)]+).py\)', r'(\1.ipynb)', mdstr) # Links to subdirectories require explicit index file inclusion if diridx: mdstr = re.sub(r']\(([^\)/]+)\)', r'](\1/index.ipynb)', mdstr) # Enclose the markdown within triple quotes and convert from # python to notebook mdstr = '"""' + mdstr + '"""' nb = py2jn.py_string_to_notebook(mdstr) py2jn.tools.write_notebook(nb, outfile, nbver=4)
Example #5
Source File: mxdoc.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def _convert_md_table_to_rst(table): """Convert a markdown table to rst format""" if len(table) < 3: return '' out = '```eval_rst\n.. list-table::\n :header-rows: 1\n\n' for i,l in enumerate(table): cols = l.split('|')[1:-1] if i == 0: ncol = len(cols) else: if len(cols) != ncol: return '' if i == 1: for c in cols: if len(c) is not 0 and '---' not in c: return '' else: for j,c in enumerate(cols): out += ' * - ' if j == 0 else ' - ' out += pypandoc.convert_text( c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n' out += '```\n' return out
Example #6
Source File: notebook.py From sphinx-gallery with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fill_notebook(work_notebook, script_blocks, gallery_conf): """Writes the Jupyter notebook cells If available, uses pypandoc to convert rst to markdown. Parameters ---------- script_blocks : list Each list element should be a tuple of (label, content, lineno). """ for blabel, bcontent, lineno in script_blocks: if blabel == 'code': add_code_cell(work_notebook, bcontent) else: if gallery_conf["pypandoc"] is False: markdown = rst2md(bcontent + '\n') else: import pypandoc # pandoc automatically addds \n to the end markdown = pypandoc.convert_text( bcontent, to='md', format='rst', **gallery_conf["pypandoc"] ) add_markdown_cell(work_notebook, markdown)
Example #7
Source File: mxdoc.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def _convert_md_table_to_rst(table): """Convert a markdown table to rst format""" if len(table) < 3: return '' out = '```eval_rst\n.. list-table::\n :header-rows: 1\n\n' for i,l in enumerate(table): cols = l.split('|')[1:-1] if i == 0: ncol = len(cols) else: if len(cols) != ncol: return '' if i == 1: for c in cols: if len(c) is not 0 and '---' not in c: return '' else: for j,c in enumerate(cols): out += ' * - ' if j == 0 else ' - ' out += pypandoc.convert_text( c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n' out += '```\n' return out
Example #8
Source File: redmine_to_github.py From pyweed with GNU Lesser General Public License v3.0 | 6 votes |
def convert_issue_data(self, redmine_issue): """ Generate the data for a new GitHub issue """ description_md = convert_text( redmine_issue['description'], 'markdown_github', 'textile' ) porting_note = '###### ported from Redmine #%s (created %s)' % ( redmine_issue['id'], redmine_issue['created_on'].split('T')[0] ) if self.is_closed(redmine_issue): porting_note = '%s (CLOSED %s)' % ( porting_note, redmine_issue['closed_on'].split('T')[0] ) body = "%s\n\n%s" % (porting_note, description_md) title = "%(subject)s (RM#%(id)s)" % redmine_issue return { "title": title, "body": body, "assignees": ["adam-iris"], }
Example #9
Source File: twlight_wikicode2html.py From TWLight with MIT License | 5 votes |
def twlight_wikicode2html(value): """Passes string through pandoc and returns html""" output = pypandoc.convert_text(value, "html", format="mediawiki") return output
Example #10
Source File: formatter.py From pytablereader with MIT License | 5 votes |
def __init__(self, source_data): try: import pypandoc except ImportError as e: # pypandoc package may do not installed in the system since the package is # an optional dependency raise PypandocImportError(e) super().__init__(pypandoc.convert_text(source_data, "html", format="mediawiki"))
Example #11
Source File: import_grundgesetz.py From oldp with MIT License | 5 votes |
def handle_law_from_xml(self, book, book_xml) -> LawBook: previous_law = None law_order = 1 # Parse XML tree tree = etree.fromstring(book_xml) for sect in tree.xpath('sect1'): section_title = sect.xpath('title/text()')[0] logger.debug('Section: %s' % section_title) # if section_title == 'Grundgesetz für die Bundesrepublik Deutschland': # continue book.add_section(from_order=law_order, title=section_title.strip()) for law_key, law_raw in enumerate(sect.xpath('sect2')): law_title = law_raw.xpath('title')[0] law_title.getparent().remove(law_title) # law_docbook = tostring(law_raw).decode('utf-8') law_docbook = '\n'.join(tostring(x).decode('utf-8') for x in law_raw.iterchildren()) law_text = pypandoc.convert_text(law_docbook, 'html', format='docbook') law_section = tostring(law_title, method="text").decode('utf-8').strip() law = Law(book=book, title='', section=law_section, slug=slugify(law_section), content=law_text, previous=previous_law, order=law_order ) law.save() law_order += 1 previous_law = law return book
Example #12
Source File: test_stitcher.py From stitch with MIT License | 5 votes |
def as_json(document): "JSON representation of the markdown document" return json.loads(pypandoc.convert_text(document, 'json', format='markdown'))
Example #13
Source File: stitch.py From stitch with MIT License | 5 votes |
def tokenize_block(source: str, pandoc_extra_args: list=None) -> list: """ Convert a Jupyter output to Pandoc's JSON AST. """ if pandoc_extra_args is None: pandoc_extra_args = [] json_doc = pypandoc.convert_text(source, to='json', format='markdown', extra_args=pandoc_extra_args) return json.loads(json_doc)['blocks']
Example #14
Source File: stitch.py From stitch with MIT License | 5 votes |
def tokenize(source: str) -> dict: """ Convert a document to pandoc's JSON AST. """ return json.loads(pypandoc.convert_text(source, 'json', 'markdown'))
Example #15
Source File: stitch.py From stitch with MIT License | 5 votes |
def convert(source: str, to: str, extra_args=(), output_file: str=None) -> None: """ Convert a source document to an output file. Parameters ---------- source : str to : str extra_args : iterable output_file : str Notes ----- Either writes to ``output_file`` or prints to stdout. """ output_name = ( os.path.splitext(os.path.basename(output_file))[0] if output_file is not None else 'std_out' ) standalone = '--standalone' in extra_args self_contained = '--self-contained' in extra_args use_prompt = '--use-prompt' in extra_args extra_args = [item for item in extra_args if item != '--use-prompt'] stitcher = Stitch(name=output_name, to=to, standalone=standalone, self_contained=self_contained, use_prompt=use_prompt) result = stitcher.stitch(source) result = json.dumps(result) newdoc = pypandoc.convert_text(result, to, format='json', extra_args=extra_args, outputfile=output_file) if output_file is None: print(newdoc)
Example #16
Source File: helpers.py From Apostrophe with GNU General Public License v3.0 | 5 votes |
def pandoc_convert(text, to="html5", args=[], outputfile=None): fr = Settings.new().get_value('input-format').get_string() or "markdown" args.extend(["--quiet"]) return pypandoc.convert_text(text, to, fr, extra_args=args, outputfile=outputfile)
Example #17
Source File: publish-gh-release-notes.py From pytest with MIT License | 5 votes |
def convert_rst_to_md(text): return pypandoc.convert_text( text, "md", format="rst", extra_args=["--wrap=preserve"] )
Example #18
Source File: twlight_wikicode2html.py From TWLight with MIT License | 5 votes |
def twlight_wikicode2html(value): """Passes string through pandoc and returns html""" output = pypandoc.convert_text(value, "html", format="mediawiki") return output
Example #19
Source File: RSSParser.py From feedDiasp with GNU General Public License v2.0 | 5 votes |
def html2markdown(html: str) -> str: """ Returns the given HTML as equivalent Markdown-structured text. """ try: return pypandoc.convert_text(html, 'md', format='html') except OSError: msg = "It's recommended to install the `pandoc` library for converting " \ "HTML into Markdown-structured text. It tends to have better results" \ "than `html2text`, which is now used as a fallback." print(msg) return html2text(html)
Example #20
Source File: utils.py From insightconnect-plugins with MIT License | 5 votes |
def convert(content, from_format, to_format, use_file=False): if use_file: filename = make_file(to_format) else: filename = None output = pypandoc.convert_text( content, to_format, format=from_format, outputfile=filename) if use_file: content = read_file(filename) try: return content.decode('UTF-8') except UnicodeDecodeError: return content.decode('latin-1') else: return output
Example #21
Source File: publish_gh_release_notes.py From rasa-for-botfront with Apache License 2.0 | 5 votes |
def convert_rst_to_md(text): return pypandoc.convert_text( text, "md", format="rst", extra_args=["--wrap=preserve"] )
Example #22
Source File: publish_gh_release_notes.py From rasa-sdk with Apache License 2.0 | 5 votes |
def convert_rst_to_md(text): return pypandoc.convert_text( text, "md", format="rst", extra_args=["--wrap=preserve"] )
Example #23
Source File: converters.py From django-htk with MIT License | 5 votes |
def html2markdown(html): """Converts `html` to Markdown-formatted text """ markdown_text = pypandoc.convert_text(html, 'markdown_strict', format='html') return markdown_text
Example #24
Source File: utils.py From podoc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_pandoc_api_version(): import pypandoc return json.loads(pypandoc.convert_text('', 'json', format='markdown'))['pandoc-api-version']
Example #25
Source File: _markdown.py From podoc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def read(self, contents, context=None): assert isinstance(contents, str) js = pypandoc.convert_text(contents, 'json', format=PANDOC_MARKDOWN_FORMAT) ast = ASTPlugin().loads(js) return ast
Example #26
Source File: descriptor_set_tasks.py From artman with Apache License 2.0 | 5 votes |
def md2rst(comment): """Convert a comment from protobuf markdown to restructuredtext. This method: - Replaces proto links with literals (e.g. [Foo][bar.baz.Foo] -> `Foo`) - Resolves relative URLs to https://cloud.google.com - Runs pandoc to convert from markdown to restructuredtext """ comment = _replace_proto_link(comment) comment = _replace_relative_link(comment) # Calling pypandoc.convert_text is slow, so we try to avoid it if there are # no special characters in the markdown. if any([i in comment for i in '`[]*_']): comment = pypandoc.convert_text(comment, 'rst', format='commonmark') # Comments are now valid restructuredtext, but there is a problem. They # are being inserted back into a descriptor set, and there is an # expectation that each line of a comment will begin with a space, to # separate it from the '//' that begins the comment. You would think # that we could ignore this detail, but it will cause formatting # problems down the line in gapic-generator because parsing code will # try to remove the leading space, affecting the indentation of lines # that actually do begin with a space, so we insert the additional # space now. Comments that are not processed by pypandoc will already # have a leading space, so should not be changed. comment = _insert_spaces(comment) return comment
Example #27
Source File: wiki.py From redmine-gitlab-migrator with GNU General Public License v3.0 | 4 votes |
def convert(self, text): text = '\n\n'.join([re.sub(self.regexCodeBlock, r'<pre>\1</pre>', block) for block in text.split('\n\n')]) collapseResults = re.findall(self.regexCollapse, text) if len(collapseResults) > 0: for i in range(0, len(collapseResults)): text = text.replace(collapseResults[i][0], "<details>") text = text.replace(collapseResults[i][2], "<summary>{}</summary> \n\n{}".format(collapseResults[i][1], collapseResults[i][2])) text = text.replace(collapseResults[i][3], "</details>") text = re.sub(self.regexParagraph, "", text) # convert from textile to markdown try: text = pypandoc.convert_text(text, 'markdown_strict', format='textile') # pandoc does not convert everything, notably the [[link|text]] syntax # is not handled. So let's fix that. # [[ wikipage | link_text ]] -> [link_text](wikipage) text = re.sub(self.regexWikiLinkWithText, self.wiki_link, text, re.MULTILINE | re.DOTALL) # [[ link_url ]] -> [link_url](link_url) text = re.sub(self.regexWikiLinkWithoutText, self.wiki_link, text, re.MULTILINE | re.DOTALL) # nested lists, fix at least the common issues text = text.replace(" \\#\\*", " -") text = text.replace(" \\*\\#", " 1.") # Redmine is using '>' for blockquote, which is not textile text = text.replace("> ", ">") # wiki note macros text = re.sub(self.regexTipMacro, r'---\n**TIP**: \1\n---\n', text, re.MULTILINE | re.DOTALL) text = re.sub(self.regexNoteMacro, r'---\n**NOTE**: \1\n---\n', text, re.MULTILINE | re.DOTALL) text = re.sub(self.regexWarningMacro, r'---\n**WARNING**: \1\n---\n', text, re.MULTILINE | re.DOTALL) text = re.sub(self.regexImportantMacro, r'---\n**IMPORTANT**: \1\n---\n', text, re.MULTILINE | re.DOTALL) # all other macros text = re.sub(self.regexAnyMacro, r'\1', text, re.MULTILINE | re.DOTALL) # attachments in notes text = re.sub(self.regexAttachment, r"\n\n*(Merged from Redmine, please check first note for attachment named **\1**)*", text, re.MULTILINE | re.DOTALL) # code highlight codeHighlights = re.findall(self.regexCodeHighlight, text) if len(codeHighlights) > 0: for i in range(0, len(codeHighlights)): text = text.replace(codeHighlights[i][0], "\n```{}".format(codeHighlights[i][2].lower())) text = text.replace(codeHighlights[i][3], "\n```") except RuntimeError as e: return False return text
Example #28
Source File: convert_jupyter_to_py.py From gempy with GNU Lesser General Public License v3.0 | 4 votes |
def convert_ipynb_to_gallery(nb, new_file): python_file = "" nb_dict = json.load(open(nb, encoding="utf8", errors='ignore')) cells = nb_dict['cells'] for i, cell in enumerate(cells): if i == 0: if cell['cell_type'] != 'markdown': rst_source = os.path.basename(file_name[:-5]) rst_source = bytes(rst_source, 'utf-8').decode('utf-8', 'ignore') python_file = '"""\n' + rst_source + '\n"""' source = ''.join(cell['source']) python_file = python_file + '\n' * 2 + source else: b = cell['source'] print(b) a = bytes(cell['source'][0], 'utf-8').decode('utf-8', 'ignore') print(a) md_source = ''.join(a) rst_source = pdoc.convert_text(md_source, 'rst', 'md') print(rst_source) rst_source = bytes(rst_source, 'utf-8').decode('utf-8', 'ignore') python_file = '"""\n' + rst_source + '\n"""' else: if cell['cell_type'] == 'markdown': md_source = ''.join(cell['source']) rst_source = pdoc.convert_text(md_source, 'rst', 'md') rst_source = rst_source.encode().decode('utf-8', 'ignore') commented_source = '\n'.join(['# ' + x for x in rst_source.split('\n')]) #python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \ # commented_source python_file = python_file + '\n\n\n' + '# %%' + '\n' + \ commented_source elif cell['cell_type'] == 'code': source = ''.join(cell['source']) python_file = python_file + '\n' * 2 + '# %% \n' + source python_file = python_file.replace("\n%", "\n# %") open(new_file, 'w', newline='', errors='ignore').write(python_file) #%%
Example #29
Source File: gendoc.py From koalas with Apache License 2.0 | 4 votes |
def gen_release_notes(path): """ Generate reStructuredText files for "Release Notes". It generates 'index.rst' file and each rst file for each version's release note under 'whatsnew' directory. The contents are from Github release notes. """ whatsnew_dir = "%s/whatsnew" % path shutil.rmtree(whatsnew_dir, ignore_errors=True) os.mkdir(whatsnew_dir) with open("%s/index.rst" % whatsnew_dir, "a") as index_file: title = "Release Notes" index_file.write("=" * len(title)) index_file.write("\n") index_file.write(title) index_file.write("\n") index_file.write("=" * len(title)) index_file.write("\n") index_file.write("\n") index_file.write(".. toctree::") index_file.write(" :maxdepth: 1") index_file.write("\n") index_file.write("\n") for name, tag_name, body in list_releases_to_document(ks.__version__): release_doc = pypandoc.convert_text(body, "rst", format="md") # Make PR reference link pretty. # Replace ", #..." to ", `...<https://github.com/databricks/koalas/pull/...>`_" release_doc = re.sub( r', #(\d+)', r', `#\1 <https://github.com/databricks/koalas/pull/\1>`_', release_doc) # Replace "(#..." to "(`...<https://github.com/databricks/koalas/pull/...>`_" release_doc = re.sub( r'\(#(\d+)', r'(`#\1 <https://github.com/databricks/koalas/pull/\1>`_', release_doc) index_file.write(" " + tag_name) index_file.write("\n") index_file.write("\n") with open("%s/%s.rst" % (whatsnew_dir, tag_name), "a") as release_file: release_file.write("=" * len(name)) release_file.write("\n") release_file.write(name) release_file.write("\n") release_file.write("=" * len(name)) release_file.write("\n") release_file.write("\n") release_file.write(release_doc) release_file.write("\n") release_file.write("\n")
Example #30
Source File: utils.py From rdmo with Apache License 2.0 | 4 votes |
def render_to_format(request, format, title, template_src, context): if format in dict(settings.EXPORT_FORMATS): # render the template to a html string template = get_template(template_src) html = template.render(context) # remove empty lines html = os.linesep.join([line for line in html.splitlines() if line.strip()]) if format == 'html': # create the response object response = HttpResponse(html) else: if format == 'pdf': # check pandoc version (the pdf arg changed to version 2) if pypandoc.get_pandoc_version().split('.')[0] == '1': args = ['-V', 'geometry:margin=1in', '--latex-engine=xelatex'] else: args = ['-V', 'geometry:margin=1in', '--pdf-engine=xelatex'] content_disposition = 'filename="%s.%s"' % (title, format) else: args = [] content_disposition = 'attachment; filename="%s.%s"' % (title, format) # use reference document for certain file formats refdoc = set_export_reference_document(format) if refdoc is not None and (format == 'docx' or format == 'odt'): if pypandoc.get_pandoc_version().startswith("1"): refdoc_param = '--reference-' + format + '=' + refdoc args.extend([refdoc_param]) else: refdoc_param = '--reference-doc=' + refdoc args.extend([refdoc_param]) # create a temporary file (tmp_fd, tmp_filename) = mkstemp('.' + format) log.info("Export " + format + " document using args " + str(args)) # convert the file using pandoc pypandoc.convert_text(html, format, format='html', outputfile=tmp_filename, extra_args=args) # read the temporary file file_handler = os.fdopen(tmp_fd, 'rb') file_content = file_handler.read() file_handler.close() # delete the temporary file os.remove(tmp_filename) # create the response object response = HttpResponse(file_content, content_type='application/%s' % format) response['Content-Disposition'] = content_disposition.encode('utf-8') return response else: return HttpResponseBadRequest(_('This format is not supported.'))