Java Code Examples for org.jsoup.nodes.TextNode#getWholeText()
The following examples show how to use
org.jsoup.nodes.TextNode#getWholeText() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Cleaner.java From astor with GNU General Public License v2.0 | 6 votes |
public void head(Node source, int depth) { if (source instanceof Element) { Element sourceEl = (Element) source; if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs ElementMeta meta = createSafeElement(sourceEl); Element destChild = meta.el; destination.appendChild(destChild); numDiscarded += meta.numAttribsDiscarded; destination = destChild; } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded. numDiscarded++; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destText = new TextNode(sourceText.getWholeText()); destination.appendChild(destText); } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) { DataNode sourceData = (DataNode) source; DataNode destData = new DataNode(sourceData.getWholeData()); destination.appendChild(destData); } else { // else, we don't care about comments, xml proc instructions, etc numDiscarded++; } }
Example 2
Source File: Cleaner.java From astor with GNU General Public License v2.0 | 6 votes |
public void head(Node source, int depth) { if (source instanceof Element) { Element sourceEl = (Element) source; if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs ElementMeta meta = createSafeElement(sourceEl); Element destChild = meta.el; destination.appendChild(destChild); numDiscarded += meta.numAttribsDiscarded; destination = destChild; } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded. numDiscarded++; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destText = new TextNode(sourceText.getWholeText()); destination.appendChild(destText); } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) { DataNode sourceData = (DataNode) source; DataNode destData = new DataNode(sourceData.getWholeData()); destination.appendChild(destData); } else { // else, we don't care about comments, xml proc instructions, etc numDiscarded++; } }
Example 3
Source File: HtmlHelper.java From FairEmail with GNU General Public License v3.0 | 5 votes |
static boolean truncate(Document d, boolean reformat) { int max = (reformat ? MAX_FORMAT_TEXT_SIZE : MAX_FULL_TEXT_SIZE); int length = 0; int images = 0; for (Element elm : d.select("*")) { if ("img".equals(elm.tagName())) images++; boolean skip = false; for (Node child : elm.childNodes()) { if (child instanceof TextNode) { TextNode tnode = ((TextNode) child); String text = tnode.getWholeText(); if (length < max) { if (length + text.length() >= max) { text = text.substring(0, max - length) + " ..."; tnode.text(text); skip = true; } } else { if (skip) tnode.text(""); } length += text.length(); } } if (length >= max && !skip) elm.remove(); } Log.i("Message size=" + length + " images=" + images); return (length >= max); }
Example 4
Source File: DocumentToJCasConverter.java From baleen with Apache License 2.0 | 5 votes |
/** * Map a node to text. * * @param node the node * @return the string */ private String mapToText(final Node node) { if (node instanceof TextNode) { final TextNode t = (TextNode) node; return t.getWholeText(); } else { return null; } }
Example 5
Source File: JsoupHtmlTextExtractor.java From james-project with Apache License 2.0 | 5 votes |
private String convertNodeToText(HTMLNode htmlNode) { Node node = htmlNode.underlyingNode; if (node instanceof TextNode) { TextNode textNode = (TextNode) node; return textNode.getWholeText(); } if (node instanceof Element) { Element element = (Element) node; if (element.tagName().equals(BR_TAG)) { return "\n"; } if (isList(element)) { return convertListElement(htmlNode.listNestedLevel); } if (element.tagName().equals(OL_TAG)) { return "\n\n"; } if (element.tagName().equals(LI_TAG)) { return "\n" + StringUtils.repeat(" ", htmlNode.listNestedLevel) + "- "; } if (element.tagName().equals(P_TAG)) { return "\n\n"; } if (element.tagName().equals(IMG_TAG)) { return generateImageAlternativeText(element); } } return ""; }
Example 6
Source File: TextExtractor.java From storm-crawler with Apache License 2.0 | 5 votes |
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parent()) || textNode instanceof CDataNode) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); }
Example 7
Source File: HiveJobFetchSpout.java From eagle with Apache License 2.0 | 4 votes |
private boolean fetchFinishedConfig(AppInfo appInfo, List<MRJob> mrJobs) { InputStream is = null; for (MRJob mrJob : mrJobs) { String urlString = crawlConfig.endPointConfig.HSBasePath + "jobhistory/conf/" + mrJob.getId() + "?" + Constants.ANONYMOUS_PARAMETER; try { LOG.info("fetch job conf from {}", urlString); is = InputStreamUtils.getInputStream(urlString, null, Constants.CompressionType.NONE); final org.jsoup.nodes.Document doc = Jsoup.parse(is, "UTF-8", urlString); doc.outputSettings().prettyPrint(false); org.jsoup.select.Elements elements = doc.select("table[id=conf]").select("tbody").select("tr"); Map<String, String> hiveQueryLog = new HashMap<>(); Iterator<org.jsoup.nodes.Element> iter = elements.iterator(); while (iter.hasNext()) { org.jsoup.nodes.Element element = iter.next(); org.jsoup.select.Elements tds = element.children(); String key = tds.get(0).text(); String value = ""; org.jsoup.nodes.Element valueElement = tds.get(1); if (Constants.HIVE_QUERY_STRING.equals(key)) { for (org.jsoup.nodes.Node child : valueElement.childNodes()) { if (child instanceof TextNode) { TextNode valueTextNode = (TextNode) child; value = valueTextNode.getWholeText(); value = StringUtils.strip(value); } } } else { value = valueElement.text(); } hiveQueryLog.put(key, value); } if (hiveQueryLog.containsKey(Constants.HIVE_QUERY_STRING)) { collector.emit(new ValuesArray(appInfo.getUser(), mrJob.getId(), Constants.ResourceType.JOB_CONFIGURATION, hiveQueryLog), mrJob.getId()); } } catch (Exception e) { LOG.warn("fetch job conf from {} failed, {}", urlString, e); e.printStackTrace(); return false; } finally { Utils.closeInputStream(is); } } return true; }