Java Code Examples for org.htmlcleaner.TagNode#getAllChildren()

The following examples show how to use org.htmlcleaner.TagNode#getAllChildren() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PreHandler.java    From SDHtmlTextView with Apache License 2.0 6 votes vote down vote up
private void getPlainText(StringBuffer buffer, Object node) {
	if (node instanceof ContentNode) {

		ContentNode contentNode = (ContentNode) node;
		String text = TextUtil.replaceHtmlEntities(contentNode.getContent()
				.toString(), true);

		buffer.append(text);

	} else if (node instanceof TagNode) {
		TagNode tagNode = (TagNode) node;
		for (Object child : tagNode.getAllChildren()) {
			getPlainText(buffer, child);
		}
	}
}
 
Example 2
Source File: HtmlSpanner.java    From SDHtmlTextView with Apache License 2.0 5 votes vote down vote up
private void applySpan(SpannableStringBuilder builder, TagNode node, SpanStack stack,
                       CancellationCallback cancellationCallback) {

    checkForCancellation(cancellationCallback);

    TagNodeHandler handler = this.handlers.get(node.getName());

    if ( handler == null ) {
        handler = new StyledTextHandler();
        handler.setSpanner(this);
    }

    int lengthBefore = builder.length();

    handler.beforeChildren(node, builder, stack);

    if ( !handler.rendersContent() ) {

        for (Object childNode : node.getAllChildren()) {

            if ( childNode instanceof ContentNode ) {
                handleContent( builder, childNode, stack, cancellationCallback );
            } else if ( childNode instanceof TagNode ) {
                applySpan( builder, (TagNode) childNode, stack, cancellationCallback );
            }
        }
    }

    int lengthAfter = builder.length();
    handler.handleTagNode(node, builder, lengthBefore, lengthAfter, stack);
}
 
Example 3
Source File: TagNodeEmptyContentCondition.java    From web-data-extractor with Apache License 2.0 5 votes vote down vote up
private boolean satisfy(TagNode tagNode, boolean override) {
    String name = tagNode.getName();
    TagInfo tagInfo = tagInfoProvider.getTagInfo(name);
    //Only _block_ elements can match.
    if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) {
        CharSequence contentString = tagNode.getText();
        if (isEmptyString(contentString)) {
            // even though there may be no text need to make sure all children are empty or can be pruned
            if (tagNode.isEmpty()) {
                return true;
            } else {
                for (Object child : tagNode.getAllChildren()) {
                    // TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern
                    // but allow empty td, ths to be pruned.
                    if (child instanceof TagNode) {
                        if (!satisfy((TagNode) child, true)) {
                            return false;
                        }
                    } else if (child instanceof ContentNode) {
                        if (!((ContentNode) child).isBlank()) {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
                return true;
            }
        }
    }
    return false;
}
 
Example 4
Source File: TagNodeInsignificantBrCondition.java    From web-data-extractor with Apache License 2.0 5 votes vote down vote up
public boolean satisfy(TagNode tagNode) {
    if (!isBrNode(tagNode)) {
        return false;
    }
    TagNode parent = tagNode.getParent();
    List children = parent.getAllChildren();
    int brIndex = children.indexOf(tagNode);
    return checkSublist(0, brIndex, children) || checkSublist(brIndex, children.size(), children);
}