Java Code Examples for org.htmlcleaner.TagNode#getText()

The following examples show how to use org.htmlcleaner.TagNode#getText() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PreTagHandler.java    From mvvm-template with GNU General Public License v3.0 6 votes vote down vote up
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) {
    if (isPre) {
        StringBuffer buffer = new StringBuffer();
        buffer.append("\n");//fake padding top + make sure, pre is always by itself
        getPlainText(buffer, node);
        buffer.append("\n");//fake padding bottom + make sure, pre is always by itself
        builder.append(replace(buffer.toString()));
        builder.append("\n");
        builder.setSpan(new CodeBackgroundRoundedSpan(color), start, builder.length(), SPAN_EXCLUSIVE_EXCLUSIVE);
        builder.append("\n");
        this.appendNewLine(builder);
        this.appendNewLine(builder);
    } else {
        StringBuffer text = node.getText();
        builder.append(" ");
        builder.append(replace(text.toString()));
        builder.append(" ");
        final int stringStart = start + 1;
        final int stringEnd = builder.length() - 1;
        builder.setSpan(new BackgroundColorSpan(color), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
        if (theme == PrefGetter.LIGHT) {
            builder.setSpan(new ForegroundColorSpan(Color.RED), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
        }
        builder.setSpan(new TypefaceSpan("monospace"), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
    }
}
 
Example 2
Source File: LinkHandler.java    From mvvm-template with GNU General Public License v3.0 5 votes vote down vote up
@Override public void handleTagNode(TagNode node, SpannableStringBuilder spannableStringBuilder, int start, int end) {
    String href = node.getAttributeByName("href");
    if (href != null) {
        spannableStringBuilder.setSpan(new LinkSpan(href, linkColor), start, end, 33);
    } else if (node.getText() != null) {
        spannableStringBuilder.setSpan(new LinkSpan("https://github.com/" + node.getText().toString(), linkColor), start, end, 33);
    }
}
 
Example 3
Source File: TagNodeEmptyContentCondition.java    From web-data-extractor with Apache License 2.0 5 votes vote down vote up
private boolean satisfy(TagNode tagNode, boolean override) {
    String name = tagNode.getName();
    TagInfo tagInfo = tagInfoProvider.getTagInfo(name);
    //Only _block_ elements can match.
    if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) {
        CharSequence contentString = tagNode.getText();
        if (isEmptyString(contentString)) {
            // even though there may be no text need to make sure all children are empty or can be pruned
            if (tagNode.isEmpty()) {
                return true;
            } else {
                for (Object child : tagNode.getAllChildren()) {
                    // TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern
                    // but allow empty td, ths to be pruned.
                    if (child instanceof TagNode) {
                        if (!satisfy((TagNode) child, true)) {
                            return false;
                        }
                    } else if (child instanceof ContentNode) {
                        if (!((ContentNode) child).isBlank()) {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
                return true;
            }
        }
    }
    return false;
}
 
Example 4
Source File: UtilsStaticAnalyzer.java    From apogen with Apache License 2.0 4 votes vote down vote up
public static String getTextFromNode(TagNode t) throws UnsupportedEncodingException {

		// first try to see if there is some interesting text in the node itself
		CharSequence nodeText = t.getText();
		String s = nodeText.toString().trim();

		// returns either: 1) text or 2) attributes or 3) children
		// I'm not considering 1 char or long texts
		if (s.length() != 0 /* && nodeText.length() <= 35 && nodeText.length() > 1 */) {
			if (s.length() > 35) {
				s = s.substring(0, 25);
			}
			s = s.replace(".", "");
			s = s.replace(",", "");
			s = s.replace(":", "");
			s = s.replace(";", "");
			s = s.replace("/", "");
			s = s.replace("(", "");
			s = s.replace(")", "");
			s = s.replace("-", "");
			s = s.replace("!", "");
			s = s.replace("?", "");
			s = s.replace("[", "");
			s = s.replace("]", "");
			s = s.replace("{", "");
			s = s.replace("}", "");
			s = s.replace("'", "");
			s = s.replace(" ", "");
			s = s.replace("\n", "");
			s = s.replace("\t", "");
			s = s.replace("\b", "");
			s = s.replace("™", "");
			s = s.replace("✓", "");
			s = s.replace("✘", "");
			s = s.replace("↓", "");
			s = s.replace("@", "_AT_");
			s = s.replace("\"", "");
			s = s.replace(String.valueOf((char) 160), "_");

			if (!s.isEmpty()) {
				return t.getName() + "_" + s;
			}
		}

		return "";
	}