Java Code Examples for org.htmlcleaner.TagNode#getName()

The following examples show how to use org.htmlcleaner.TagNode#getName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ListsHandler.java From mvvm-template with GNU General Public License v3.0

5 votes

@Override public void beforeChildren(TagNode node, SpannableStringBuilder builder) {
    TodoItems todoItem = null;
    if (node.getChildTags() != null && node.getChildTags().length > 0) {
        for (TagNode tagNode : node.getChildTags()) {
            Logger.e(tagNode.getName(), tagNode.getText());
            if (tagNode.getName() != null && tagNode.getName().equals("input")) {
                todoItem = new TodoItems();
                todoItem.isChecked = tagNode.getAttributeByName("checked") != null;
                break;
            }
        }
    }
    if ("ol".equals(getParentName(node))) {
        builder.append(String.valueOf(getMyIndex(node))).append(". ");
    } else if ("ul".equals(getParentName(node))) {
        if (todoItem != null) {
            if (checked == null || unchecked == null) {
                builder.append(todoItem.isChecked ? "☑" : "☐");
            } else {
                builder.append(SpannableBuilder.builder()
                        .append(todoItem.isChecked ? checked : unchecked))
                        .append(" ");
            }
        } else {
            builder.append("\u2022 ");
        }
    }
}

Example 2

Source File: TagNodeEmptyContentCondition.java From web-data-extractor with Apache License 2.0

5 votes

private boolean satisfy(TagNode tagNode, boolean override) {
    String name = tagNode.getName();
    TagInfo tagInfo = tagInfoProvider.getTagInfo(name);
    //Only _block_ elements can match.
    if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) {
        CharSequence contentString = tagNode.getText();
        if (isEmptyString(contentString)) {
            // even though there may be no text need to make sure all children are empty or can be pruned
            if (tagNode.isEmpty()) {
                return true;
            } else {
                for (Object child : tagNode.getAllChildren()) {
                    // TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern
                    // but allow empty td, ths to be pruned.
                    if (child instanceof TagNode) {
                        if (!satisfy((TagNode) child, true)) {
                            return false;
                        }
                    } else if (child instanceof ContentNode) {
                        if (!((ContentNode) child).isBlank()) {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
                return true;
            }
        }
    }
    return false;
}

Example 3

Source File: ResponseRenderPrintWriter.java From zrlog with Apache License 2.0

5 votes

private String getCompressAndParseHtml(String inputBody) throws IOException {
    String currentBody = inputBody;

    //不显示none标签
    if (currentBody.endsWith(endFlag)) {
        currentBody = currentBody.substring(0, currentBody.length() - endFlag.length());
    }
    HtmlCleaner htmlCleaner = new HtmlCleaner();
    htmlCleaner.getProperties().setCharset(charset);
    htmlCleaner.getProperties().setUseCdataForScriptAndStyle(false);
    TagNode tagNode = htmlCleaner.clean(currentBody);
    TagNode[] tagNodes = tagNode.getAllElements(true);
    Map<String, String> plugin = new HashMap<>();
    for (TagNode tag : tagNodes) {
        if (tag != null) {
            String tagName = tag.getName();
            addStaticResourceFlag(tag, tagName);
            parseCustomHtmlTag(htmlCleaner, plugin, tag, tagName);
        }
    }

    SimpleHtmlSerializer serializer = new SimpleHtmlSerializer(htmlCleaner.getProperties());
    StringWriter stringWriter = new StringWriter();
    tagNode.serialize(serializer, stringWriter);
    currentBody = stringWriter.toString();
    if (tagNode.getDocType() != null) {
        currentBody = tagNode.getDocType() + currentBody;
    }
    for (Map.Entry<String, String> entry : plugin.entrySet()) {
        currentBody = currentBody.replace(entry.getKey(), entry.getValue());
    }
    currentBody = currentBody + "<!--" + (System.currentTimeMillis() - startTime) + "ms-->";
    return currentBody;

}

Example 4

Source File: UtilsStaticAnalyzer.java From apogen with Apache License 2.0

4 votes

public static String getTextFromNode(TagNode t) throws UnsupportedEncodingException {

		// first try to see if there is some interesting text in the node itself
		CharSequence nodeText = t.getText();
		String s = nodeText.toString().trim();

		// returns either: 1) text or 2) attributes or 3) children
		// I'm not considering 1 char or long texts
		if (s.length() != 0 /* && nodeText.length() <= 35 && nodeText.length() > 1 */) {
			if (s.length() > 35) {
				s = s.substring(0, 25);
			}
			s = s.replace(".", "");
			s = s.replace(",", "");
			s = s.replace(":", "");
			s = s.replace(";", "");
			s = s.replace("/", "");
			s = s.replace("(", "");
			s = s.replace(")", "");
			s = s.replace("-", "");
			s = s.replace("!", "");
			s = s.replace("?", "");
			s = s.replace("[", "");
			s = s.replace("]", "");
			s = s.replace("{", "");
			s = s.replace("}", "");
			s = s.replace("'", "");
			s = s.replace(" ", "");
			s = s.replace("\n", "");
			s = s.replace("\t", "");
			s = s.replace("\b", "");
			s = s.replace("™", "");
			s = s.replace("✓", "");
			s = s.replace("✘", "");
			s = s.replace("↓", "");
			s = s.replace("@", "_AT_");
			s = s.replace("\"", "");
			s = s.replace(String.valueOf((char) 160), "_");

			if (!s.isEmpty()) {
				return t.getName() + "_" + s;
			}
		}

		return "";
	}