Python html2text.html2text() Examples
The following are 30
code examples of html2text.html2text().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
html2text
, or try the search function
.
Example #1
Source File: email_service.py From python-for-entrepreneurs-course-demos with MIT License | 6 votes |
def send_email(to_address, subject, html_body): try: smtp = EmailService.create_smtp_server() message = mailer.Message( From=EmailService.__from_address, To=to_address, charset='utf-8') message.Subject = subject message.Html = html_body message.Body = html2text.html2text(html_body) if not EmailService.__is_debug_mode: print("Sending message (live!)") smtp.send(message) else: print("Skipping send, email is in dev mode.") except Exception as x: print("Error sending mail: {}".format(x))
Example #2
Source File: converters.py From allura with Apache License 2.0 | 6 votes |
def mediawiki2markdown(source): try: import html2text from mediawiki import wiki2html except ImportError: raise ImportError("""This operation requires GPL libraries: "mediawiki" (https://pypi.org/project/mediawiki2html/) "html2text" (https://pypi.org/project/html2text/)""") html2text.BODY_WIDTH = 0 wiki_content = wiki2html(source, True) wiki_content = _convert_toc(wiki_content) markdown_text = html2text.html2text(wiki_content) markdown_text = markdown_text.replace('<', '<').replace('>', '>') return markdown_text
Example #3
Source File: run_whoosh.py From TorCMS with MIT License | 6 votes |
def do_for_wiki(rand=True, doc_type=''): if rand: recs = MWiki.query_random(num=10, kind='1') else: recs = MWiki.query_recent(num=2, kind='1') for rec in recs: text2 = rec.title + ',' + html2text.html2text(tornado.escape.xhtml_unescape(rec.cnt_html)) writer = TOR_IDX.writer() writer.update_document( title=rec.title, catid='sid1', type=doc_type, link='/wiki/{0}'.format(rec.title), content=text2 ) writer.commit()
Example #4
Source File: run_whoosh.py From TorCMS with MIT License | 6 votes |
def do_for_post(rand=True, doc_type=''): if rand: recs = MPost.query_random(num=10, kind='1') else: recs = MPost.query_recent(num=2, kind='1') for rec in recs: text2 = rec.title + ',' + html2text.html2text(tornado.escape.xhtml_unescape(rec.cnt_html)) writer = TOR_IDX.writer() writer.update_document( title=rec.title, catid='sid1', type=doc_type, link='/post/{0}'.format(rec.uid), content=text2, ) writer.commit()
Example #5
Source File: models.py From colossus with MIT License | 6 votes |
def send(self, to: str, context: dict = None): """ Send a confirm email/welcome email/goodbye email to a subscriber. If the SubscriptionFormTemplate instance is not an email, it will raise an FormTemplateIsNotEmail exception. :param to: Subscriber email address :param context: Extra context to be used during email rendering """ if not self.is_email: raise FormTemplateIsNotEmail rich_text_message = self.render_template(context) plain_text_message = html2text.html2text(rich_text_message, bodywidth=2000) email = EmailMultiAlternatives( subject=self.subject, body=plain_text_message, from_email=self.get_from_email(), to=[to] ) email.attach_alternative(rich_text_message, 'text/html') email.send()
Example #6
Source File: filter_handler.py From TorCMS with MIT License | 6 votes |
def echo_html_list_str(self, catid, infos): ''' 生成 list 后的 HTML 格式的字符串 ''' zhiding_str = '' tuiguang_str = '' imgname = 'fixed/zhanwei.png' kwd = { 'imgname': imgname, 'zhiding': zhiding_str, 'tuiguang': tuiguang_str, } self.render('autogen/infolist/infolist_{0}.html'.format(catid), userinfo=self.userinfo, kwd=kwd, html2text=html2text, post_infos=infos, widget_info=kwd)
Example #7
Source File: email_service.py From cookiecutter-course with GNU General Public License v2.0 | 6 votes |
def send_email(to_address, subject, html_body): try: smtp = EmailService.create_smtp_server() message = mailer.Message( From=EmailService.__from_address, To=to_address, charset='utf-8') message.Subject = subject message.Html = html_body message.Body = html2text.html2text(html_body) if not EmailService.__is_debug_mode: print("Sending message (live!)") smtp.send(message) else: print("Skipping send, email is in dev mode.") except Exception as x: print("Error sending mail: {}".format(x))
Example #8
Source File: email_service.py From python-for-entrepreneurs-course-demos with MIT License | 6 votes |
def send_email(to_address, subject, html_body): try: smtp = EmailService.create_smtp_server() message = mailer.Message( From=EmailService.__from_address, To=to_address, charset='utf-8') message.Subject = subject message.Html = html_body message.Body = html2text.html2text(html_body) if not EmailService.__is_debug_mode: print("Sending message (live!)") smtp.send(message) else: print("Skipping send, email is in dev mode.") except Exception as x: print("Error sending mail: {}".format(x))
Example #9
Source File: email_service.py From python-for-entrepreneurs-course-demos with MIT License | 6 votes |
def send_email(to_address, subject, html_body): try: smtp = EmailService.create_smtp_server() message = mailer.Message( From=EmailService.__from_address, To=to_address, charset='utf-8') message.Subject = subject message.Html = html_body message.Body = html2text.html2text(html_body) if not EmailService.__is_debug_mode: print("Sending message (live!)") smtp.send(message) else: print("Skipping send, email is in dev mode.") except Exception as x: print("Error sending mail: {}".format(x))
Example #10
Source File: email_service.py From python-for-entrepreneurs-course-demos with MIT License | 6 votes |
def send_email(to_address, subject, html_body): try: smtp = EmailService.create_smtp_server() message = mailer.Message( From=EmailService.__from_address, To=to_address, charset='utf-8') message.Subject = subject message.Html = html_body message.Body = html2text.html2text(html_body) if not EmailService.__is_debug_mode: print("Sending message (live!)") smtp.send(message) else: print("Skipping send, email is in dev mode.") except Exception as x: print("Error sending mail: {}".format(x))
Example #11
Source File: email_service.py From python-for-entrepreneurs-course-demos with MIT License | 6 votes |
def send_email(to_address, subject, html_body): try: smtp = EmailService.create_smtp_server() message = mailer.Message( From=EmailService.__from_address, To=to_address, charset='utf-8') message.Subject = subject message.Html = html_body message.Body = html2text.html2text(html_body) if not EmailService.__is_debug_mode: print("Sending message (live!)") smtp.send(message) else: print("Skipping send, email is in dev mode.") except Exception as x: print("Error sending mail: {}".format(x))
Example #12
Source File: email_service.py From cookiecutter-pyramid-talk-python-starter with MIT License | 6 votes |
def send_email(to_address, subject, html_body): try: smtp = EmailService.create_smtp_server() message = mailer.Message( From=EmailService.__from_address, To=to_address, charset='utf-8') message.Subject = subject message.Html = html_body message.Body = html2text.html2text(html_body) if not EmailService.__is_debug_mode: print("Sending message (live!)") smtp.send(message) else: print("Skipping send, email is in dev mode.") except Exception as x: print("Error sending mail: {}".format(x))
Example #13
Source File: export.py From patzilla with GNU Affero General Public License v3.0 | 5 votes |
def get_fulltext(payload, what): xpath_lang = '/ops:world-patent-data/ftxt:fulltext-documents/ftxt:fulltext-document/ftxt:{what}/@lang'.format(what=what) xpath_content = '/ops:world-patent-data/ftxt:fulltext-documents/ftxt:fulltext-document/ftxt:{what}'.format(what=what) namespaces = {'ops': 'http://ops.epo.org', 'ftxt': 'http://www.epo.org/fulltext'} tree = ET.parse(BytesIO(payload)) #print 'tree:'; pprint(tree) lang = tree.xpath(xpath_lang, namespaces=namespaces) #print 'lang:', lang elements = tree.xpath(xpath_content, namespaces=namespaces) if elements: return html2text.html2text(ET.tostring(elements[0]))
Example #14
Source File: integration_test.py From microblog.pub with GNU Affero General Public License v3.0 | 5 votes |
def resp2plaintext(resp): """Convert the body of a requests reponse to plain text in order to make basic assertions.""" return html2text(resp.text)
Example #15
Source File: federation_test.py From microblog.pub with GNU Affero General Public License v3.0 | 5 votes |
def resp2plaintext(resp): """Convert the body of a requests reponse to plain text in order to make basic assertions.""" return html2text(resp.text)
Example #16
Source File: mail.py From django-userena-ce with BSD 3-Clause "New" or "Revised" License | 5 votes |
def send_mail( subject, message_plain, message_html, email_from, email_to, custom_headers={}, attachments=(), ): """ Build the email as a multipart message containing a multipart alternative for text (plain, HTML) plus all the attached files. """ if not message_plain and not message_html: raise ValueError(_("Either message_plain or message_html should be not None")) if not message_plain: message_plain = html2text(message_html) message = {} message["subject"] = subject message["body"] = message_plain message["from_email"] = email_from message["to"] = email_to if attachments: message["attachments"] = attachments if custom_headers: message["headers"] = custom_headers msg = EmailMultiAlternatives(**message) if message_html: msg.attach_alternative(message_html, "text/html") msg.send()
Example #17
Source File: comparison.py From trafilatura with GNU General Public License v3.0 | 5 votes |
def run_html2text(htmlstring): '''try with the html2text module''' text = html2text.html2text(htmlstring) return text # sanitize(text)
Example #18
Source File: feed.py From microblog.pub with GNU Affero General Public License v3.0 | 5 votes |
def json_feed(path: str) -> Dict[str, Any]: """JSON Feed (https://jsonfeed.org/) document.""" data = [] for item in DB.activities.find( { "box": Box.OUTBOX.value, "type": "Create", "meta.deleted": False, "meta.public": True, }, limit=10, ).sort("_id", -1): data.append( { "id": item["activity"]["id"], "url": item["activity"]["object"].get("url"), "content_html": item["activity"]["object"]["content"], "content_text": html2text(item["activity"]["object"]["content"]), "date_published": item["activity"]["object"].get("published"), } ) return { "version": "https://jsonfeed.org/version/1", "user_comment": ( "This is a microblog feed. You can add this to your feed reader using the following URL: " + ID + path ), "title": USERNAME, "home_page_url": ID, "feed_url": ID + path, "author": { "name": USERNAME, "url": ID, "avatar": ME.get("icon", {}).get("url"), }, "items": data, }
Example #19
Source File: define.py From python-zulip-api with Apache License 2.0 | 5 votes |
def get_bot_define_response(self, original_content: str) -> str: split_content = original_content.split(' ') # If there are more than one word (a phrase) if len(split_content) > 1: return DefineHandler.PHRASE_ERROR_MESSAGE to_define = split_content[0].strip() to_define_lower = to_define.lower() # Check for presence of non-letters non_letters = set(to_define_lower) - set(string.ascii_lowercase) if len(non_letters): return self.SYMBOLS_PRESENT_ERROR_MESSAGE # No word was entered. if not to_define_lower: return self.EMPTY_WORD_REQUEST_ERROR_MESSAGE else: response = '**{}**:\n'.format(to_define) try: # Use OwlBot API to fetch definition. api_result = requests.get(self.DEFINITION_API_URL.format(to_define_lower)) # Convert API result from string to JSON format. definitions = api_result.json() # Could not fetch definitions for the given word. if not definitions: response += self.REQUEST_ERROR_MESSAGE else: # Definitions available. # Show definitions line by line. for d in definitions: example = d['example'] if d['example'] else '*No example available.*' response += '\n' + '* (**{}**) {}\n {}'.format(d['type'], d['definition'], html2text.html2text(example)) except Exception: response += self.REQUEST_ERROR_MESSAGE logging.exception("") return response
Example #20
Source File: trac_export.py From allura with Apache License 2.0 | 5 votes |
def parse_ticket(self, id): # Use CSV export to get ticket fields url = self.full_url(self.TICKET_URL % id, 'csv') f = self.csvopen(url) reader = csv.DictReader(f) ticket_fields = next(reader) ticket_fields['class'] = 'ARTIFACT' ticket = self.remap_fields(ticket_fields) # Use HTML export to get ticket description and comments import html2text html2text.BODY_WIDTH = 0 url = self.full_url(self.TICKET_URL % id) self.log_url(url) d = BeautifulSoup(urlopen(url)) self.clean_missing_wiki_links(d) desc = d.find('div', 'description').find('div', 'searchable') ticket['description'] = html2text.html2text( desc.renderContents('utf8').decode('utf8')) if desc else '' comments = [] relative_base_url = six.moves.urllib.parse.urlparse(self.full_url(self.TICKET_URL % '')).path for comment in d.findAll('form', action='#comment'): c = {} c['submitter'] = re.sub( r'.* by ', '', comment.find('h3', 'change').text).strip() c['date'] = self.trac2z_date( comment.find('a', 'timeline')['title'].replace(' in Timeline', '')) changes = six.text_type(comment.find('ul', 'changes') or '') body = comment.find('div', 'comment') body = body.renderContents('utf8').decode('utf8') if body else '' body = body.replace('href="{}'.format(relative_base_url), 'href="') # crude way to rewrite ticket links c['comment'] = html2text.html2text(changes + body) c['class'] = 'COMMENT' comments.append(c) ticket['comments'] = comments return ticket
Example #21
Source File: leetcode-crawler.py From leetcode-crawler with MIT License | 5 votes |
def generate_question_markdown(self, question, path, has_get_code): text_path = os.path.join(path, "{:0>3d}-{}".format(question['frontedId'], question['slug'])) if not os.path.isdir(text_path): os.mkdir(text_path) with open(os.path.join(text_path, "README.md"), 'w', encoding='utf-8') as f: f.write("# [{}][title]\n".format(question['title'])) f.write("\n## Description\n\n") text = question['content'] content = html2text.html2text(text).replace("**Input:**", "Input:").replace("**Output:**", "Output:").replace('**Explanation:**', 'Explanation:').replace('\n ', ' ') f.write(content) f.write("\n**Tags:** {}\n".format(question['tags'])) f.write("\n**Difficulty:** {}\n".format(question['difficulty'])) f.write("\n## 思路\n") if self.is_login and has_get_code: sql = "SELECT code, language FROM last_ac_submission_record WHERE question_slug = ? ORDER BY timestamp" cursor = self.conn.cursor() cursor.execute(sql, (question['slug'],)) submission = cursor.fetchone() cursor.close() if submission != None: f.write("\n``` %s\n" %(submission[1])) f.write(submission[0].encode('utf-8').decode('unicode_escape')) f.write("\n```\n") f.write("\n[title]: https://leetcode.com/problems/{}\n".format(question['slug']))
Example #22
Source File: RSSReader.py From RedditBots with MIT License | 5 votes |
def get_new_articles(source): articles = [] try: response = urllib.request.urlopen(source) orig_rss = response.read().decode("utf-8") rss = ET.fromstring(orig_rss) channel = rss.find("channel") for item in channel.findall("item"): # Not used anymore # pubDate = item.find("pubDate").text # pubDateConv = mktime(time.strptime(pubDate, PUBDATEFORMAT))) link = item.find("link").text title = item.find("title") if title is not None: title = title.text if title is None: print("found no title, will use link") title = link description = item.find("description") if description is not None: description = html2text.html2text(description.text) guid = item.find("guid") if guid is not None: guid = guid.text if guid is None: #print("found no guid, will use link") guid = link articles.append((title, link, description, guid)) except URLError as e: print("Error:", e.reason) return articles
Example #23
Source File: speak.py From pythonista-scripts with MIT License | 5 votes |
def main(): speech.stop() if not appex.is_running_extension(): console.hud_alert('Reading clipboard') text = clipboard.get() url = None else: text = appex.get_text() url = appex.get_url() if url == None: try: url = [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(text) ][0] except: pass if url != None: console.hud_alert('Reading: ' + url) h = html2text.HTML2Text() try: r = requests.get( url=url, headers={"User-agent": "Mozilla/5.0{0:06}".format(random.randrange(999999))}) except requests.ConnectionError as e: console.alert('Unable to connect to url.') return True html_content = r.text.decode('utf-8') text = html2text.html2text(html_content) else: console.hud_alert('Reading text: ' + str(text)) if text: speech.say(text) stop = console.alert('Done?', hide_cancel_button=True, button1='OK') speech.stop() else: console.hud_alert('No text found.')
Example #24
Source File: url2md.py From pythonista-scripts with MIT License | 5 votes |
def main(): if appex.is_running_extension(): url = appex.get_url() if url == None: text = appex.get_text() url = [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(text) ][0] else: text = clipboard.get().strip() url = [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(text) ][0] if not "http" in url: url = "http://" try: url = console.input_alert("URL", "", url) except: return True console.hud_alert('URL: %s' % url) h = html2text.HTML2Text() try: r = requests.get( url=url, headers={"User-agent": "Mozilla/5.0{0:06}".format(random.randrange(999999))} ) except Exception as e: raise(e.message) return True html_content = r.text.decode('utf-8') rendered_content = html2text.html2text(html_content) clipboard.set(rendered_content) launch_e = console.alert('Markdown copied to clipboard. Launch Evernote?', button1='Yes', button2='No', hide_cancel_button=True) if launch_e ==1: _eurl = "evernote://x-callback-url/new-note?type=clipboard&title=DRAFT&text=" app=UIApplication.sharedApplication() eurl=nsurl(_eurl) app.openURL_(eurl) appex.finish()
Example #25
Source File: ical.py From sync-engine with GNU Affero General Public License v3.0 | 5 votes |
def generate_invite_message(ical_txt, event, account, invite_type='request'): assert invite_type in ['request', 'update', 'cancel'] html_body = event.description or '' text_body = html2text(html_body) msg = mime.create.multipart('mixed') body = mime.create.multipart('alternative') if invite_type in ['request', 'update']: body.append( mime.create.text('plain', text_body), mime.create.text('html', html_body), mime.create.text('calendar; method=REQUEST', ical_txt, charset='utf8')) msg.append(body) elif invite_type == 'cancel': body.append( mime.create.text('plain', text_body), mime.create.text('html', html_body), mime.create.text('calendar; method=CANCEL', ical_txt, charset='utf8')) msg.append(body) # From should match our mailsend provider (mailgun) so it doesn't confuse # spam filters msg.headers['From'] = "automated@notifications.nylas.com" msg.headers['Reply-To'] = account.email_address if invite_type == 'request': msg.headers['Subject'] = u'Invitation: {}'.format(event.title) elif invite_type == 'update': msg.headers['Subject'] = u'Updated Invitation: {}'.format(event.title) elif invite_type == 'cancel': msg.headers['Subject'] = u'Cancelled: {}'.format(event.title) return msg
Example #26
Source File: doc_spell_checker.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def check_grammar(self, file_name): """Check the grammar of the specified file Parameters ----------- file_name: name of the file to be checked """ file_content = html2text.html2text(open(file_name).read()) file_content = re.sub(u"[\x00-\x08\x0b-\x0c\x0e-\x1f]+", u"", file_content) self.__grammar_check_res = self.__grammar_checker.check(file_content)
Example #27
Source File: app.py From activitypub with Mozilla Public License 2.0 | 5 votes |
def html2plaintext(self, body, *args, **kwargs): return html2text(body)
Example #28
Source File: send_email.py From loaner with Apache License 2.0 | 5 votes |
def send_shelf_audit_email(shelf): """Sends a shelf audit email. Args: shelf: shelf_model.Shelf object for location details. Raises: SendEmailError: if the data pertaining to the audit is incomplete. """ timedelta_since_audit = datetime.datetime.utcnow() - shelf.last_audit_time template_dict = { 'friendly_name': shelf.friendly_name, 'hours_since_audit': int(timedelta_since_audit.total_seconds() / 3600), 'location': shelf.location, 'origin': constants.ORIGIN, } title, body = constants.TEMPLATE_LOADER.render( 'shelf_audit_request', template_dict) email_dict = { 'to': config_model.Config.get('shelf_audit_email_to'), 'subject': title, 'body': html2text.html2text(body), 'html': body, } # We want each different subject to generate a unique hash. logging.info( 'Sending email to %s\nSubject: %s.', shelf.responsible_for_audit, title) _send_email(**email_dict)
Example #29
Source File: doc_spell_checker.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def check_grammar(self, file_name): """Check the grammar of the specified file Parameters ----------- file_name: name of the file to be checked """ file_content = html2text.html2text(open(file_name).read()) file_content = re.sub(u"[\x00-\x08\x0b-\x0c\x0e-\x1f]+", u"", file_content) self.__grammar_check_res = self.__grammar_checker.check(file_content)
Example #30
Source File: book.py From JARVIS-on-Messenger with MIT License | 5 votes |
def process(input, entities): output = {} try: book_title = entities['book'][0]['value'] with requests_cache.enabled('book_cache', backend='sqlite', expire_after=86400): response = requests.get( 'https://www.goodreads.com/book/title.xml?key=' + GOODREADS_ACCESS_TOKEN + '&title=' + book_title) data = ElementTree.fromstring(response.content) book_node = data.find('book') author = book_node.find('authors').find('author').find('name').text title = book_node.find('title').text description = html2text(book_node.find('description').text) average_rating = book_node.find('average_rating').text link = book_node.find('link').text goodreads_attribution = '- Powered by Goodreads' template = TextTemplate() template.set_text('Title: ' + title + '\nAuthor: ' + author + '\nDescription: ' + description) template.set_post_text('\nAverage Rating: ' + average_rating + ' / 5' + '\n' + goodreads_attribution) text = template.get_text() template = ButtonTemplate(text) template.add_web_url('Goodreads Link', link) output['input'] = input output['output'] = template.get_message() output['success'] = True except: error_message = 'I couldn\'t find any book matching your query.' error_message += '\nPlease ask me something else, like:' error_message += '\n - book timeline' error_message += '\n - harry potter book plot' error_message += '\n - little women book rating' output['error_msg'] = TextTemplate(error_message).get_message() output['success'] = False return output