org.htmlcleaner.TagNode Java Examples
The following examples show how to use
org.htmlcleaner.TagNode.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Http.java From BotLibre with Eclipse Public License 1.0 | 6 votes |
/** * Convert the HTML input stream into DOM parsable XHTML. */ public StringReader convertToXHTML(InputStream input) throws IOException { StringWriter output = new StringWriter(); /*int next = input.read(); while (next != -1) { output.write(next); next = input.read(); } String result = output.toString(); System.out.println(result);*/ TagNode node = getHtmlCleaner().clean(input, "UTF-8"); //TagNode node = getHtmlCleaner().clean(result); node.serialize(new SimpleXmlSerializer(getHtmlCleaner().getProperties()), output); output.flush(); String xhtml = output.toString(); return new StringReader(xhtml); }
Example #2
Source File: PreHandler.java From SDHtmlTextView with Apache License 2.0 | 6 votes |
private void getPlainText(StringBuffer buffer, Object node) { if (node instanceof ContentNode) { ContentNode contentNode = (ContentNode) node; String text = TextUtil.replaceHtmlEntities(contentNode.getContent() .toString(), true); buffer.append(text); } else if (node instanceof TagNode) { TagNode tagNode = (TagNode) node; for (Object child : tagNode.getAllChildren()) { getPlainText(buffer, child); } } }
Example #3
Source File: ListsHandler.java From mvvm-template with GNU General Public License v3.0 | 6 votes |
private int getMyIndex(TagNode node) { if (node.getParent() == null) { return -1; } else { int i = 1; for (Object child : node.getParent().getChildren()) { if (child == node) { return i; } if (child instanceof TagNode) { TagNode childNode = (TagNode) child; if ("li".equals(childNode.getName())) { ++i; } } } return -1; } }
Example #4
Source File: PreTagHandler.java From mvvm-template with GNU General Public License v3.0 | 6 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) { if (isPre) { StringBuffer buffer = new StringBuffer(); buffer.append("\n");//fake padding top + make sure, pre is always by itself getPlainText(buffer, node); buffer.append("\n");//fake padding bottom + make sure, pre is always by itself builder.append(replace(buffer.toString())); builder.append("\n"); builder.setSpan(new CodeBackgroundRoundedSpan(color), start, builder.length(), SPAN_EXCLUSIVE_EXCLUSIVE); builder.append("\n"); this.appendNewLine(builder); this.appendNewLine(builder); } else { StringBuffer text = node.getText(); builder.append(" "); builder.append(replace(text.toString())); builder.append(" "); final int stringStart = start + 1; final int stringEnd = builder.length() - 1; builder.setSpan(new BackgroundColorSpan(color), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE); if (theme == PrefGetter.LIGHT) { builder.setSpan(new ForegroundColorSpan(Color.RED), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE); } builder.setSpan(new TypefaceSpan("monospace"), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE); } }
Example #5
Source File: ListItemHandler.java From SDHtmlTextView with Apache License 2.0 | 6 votes |
private int getMyIndex(TagNode node) { if (node.getParent() == null) { return -1; } int i = 1; for (Object child : node.getParent().getAllChildren()) { if (child == node) { return i; } if (child instanceof TagNode) { TagNode childNode = (TagNode) child; if ("li".equals(childNode.getName())) { i++; } } } return -1; }
Example #6
Source File: StyleNodeHandler.java From SDHtmlTextView with Apache License 2.0 | 6 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, SpanStack spanStack) { if ( getSpanner().isAllowStyling() ) { if ( node.getAllChildren().size() == 1 ) { Object childNode = node.getAllChildren().get(0); if ( childNode instanceof ContentNode ) { parseCSSFromText( ( (ContentNode) childNode ).getContent(), spanStack ); } } } }
Example #7
Source File: HtmlUtil.java From ispider with Apache License 2.0 | 6 votes |
/** * 得到url列表 * @param tagNode * @param attr * @param xpath * @return */ public static List<String> getListUrlByXpath(TagNode tagNode, String attr, String xpath) { List<String> urls = new ArrayList<>(); try { Object[] objs = tagNode.evaluateXPath(xpath); if (objs != null && objs.length > 0) { for (Object obj : objs) { TagNode aTagNode = (TagNode) obj; String url = aTagNode.getAttributeByName(attr); urls.add("https:" + url); } } return urls; } catch (XPatherException e) { e.printStackTrace(); } return null; }
Example #8
Source File: AlignmentAttributeHandler.java From SDHtmlTextView with Apache License 2.0 | 6 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, Style style, SpanStack spanStack) { String align = node.getAttributeByName("align"); if ( "right".equalsIgnoreCase(align) ) { style = style.setTextAlignment(Style.TextAlignment.RIGHT); } else if ( "center".equalsIgnoreCase(align) ) { style = style.setTextAlignment(Style.TextAlignment.CENTER); } else if ( "left".equalsIgnoreCase(align) ) { style = style.setTextAlignment(Style.TextAlignment.LEFT); } super.handleTagNode(node, builder, start, end, style, spanStack); }
Example #9
Source File: HrHandler.java From mvvm-template with GNU General Public License v3.0 | 5 votes |
@Override public void handleTagNode(TagNode tagNode, SpannableStringBuilder spannableStringBuilder, int i, int i1) { spannableStringBuilder.append("\n"); SpannableStringBuilder builder = new SpannableStringBuilder("$"); HrSpan hrSpan = new HrSpan(color, width); builder.setSpan(hrSpan, 0, builder.length(), Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); builder.setSpan(new CenterSpan(), 0, builder.length(), Spanned.SPAN_EXCLUSIVE_EXCLUSIVE); builder.append("\n"); spannableStringBuilder.append(builder); }
Example #10
Source File: TagNodeEmptyContentCondition.java From web-data-extractor with Apache License 2.0 | 5 votes |
private boolean satisfy(TagNode tagNode, boolean override) { String name = tagNode.getName(); TagInfo tagInfo = tagInfoProvider.getTagInfo(name); //Only _block_ elements can match. if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) { CharSequence contentString = tagNode.getText(); if (isEmptyString(contentString)) { // even though there may be no text need to make sure all children are empty or can be pruned if (tagNode.isEmpty()) { return true; } else { for (Object child : tagNode.getAllChildren()) { // TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern // but allow empty td, ths to be pruned. if (child instanceof TagNode) { if (!satisfy((TagNode) child, true)) { return false; } } else if (child instanceof ContentNode) { if (!((ContentNode) child).isBlank()) { return false; } } else { return false; } } return true; } } } return false; }
Example #11
Source File: HtmlUtil.java From ispider with Apache License 2.0 | 5 votes |
/** * 根据xpath和属性获取对应标签的属性值 * * @param tagNode * @param attr * @param xpath * @return */ public static String getAttrByXpath(TagNode tagNode, String attr, String xpath) { try { Object[] objs = tagNode.evaluateXPath(xpath); if (objs != null && objs.length > 0) { TagNode node = (TagNode) objs[0]; return node.getAttributeByName(attr); } } catch (XPatherException e) { e.printStackTrace(); } return null; }
Example #12
Source File: TagNodeInsignificantBrCondition.java From web-data-extractor with Apache License 2.0 | 5 votes |
public boolean satisfy(TagNode tagNode) { if (!isBrNode(tagNode)) { return false; } TagNode parent = tagNode.getParent(); List children = parent.getAllChildren(); int brIndex = children.indexOf(tagNode); return checkSublist(0, brIndex, children) || checkSublist(brIndex, children.size(), children); }
Example #13
Source File: TagNodeAttNameValueRegexCondition.java From web-data-extractor with Apache License 2.0 | 5 votes |
public boolean satisfy(TagNode tagNode) { if (tagNode != null) { for (Map.Entry<String, String> entry : tagNode.getAttributes().entrySet()) { if ((attNameRegex == null || attNameRegex.matcher(entry.getKey()).find()) && (attValueRegex == null || attValueRegex.matcher(entry.getValue()).find())) { return true; } } } return false; }
Example #14
Source File: ListItemHandler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
private String getParentName(TagNode node) { if (node.getParent() == null) { return null; } return node.getParent().getName(); }
Example #15
Source File: HtmlUtil.java From ispider with Apache License 2.0 | 5 votes |
/** * 根据指定的xpath,从tagNode中选择具体的标签Text * * @param tagNode * @param xpath * @return */ public static String getTextByXpath(TagNode tagNode, String xpath) { Object[] objs = null; try { objs = tagNode.evaluateXPath(xpath); if (objs != null && objs.length > 0) { TagNode titleNode = (TagNode) objs[0]; return titleNode.getText().toString().trim(); } } catch (XPatherException e) { e.printStackTrace(); } return null; }
Example #16
Source File: LinkHandler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, SpanStack spanStack) { final String href = node.getAttributeByName("href"); spanStack.pushSpan(new URLSpan(href), start, end); }
Example #17
Source File: LinkHandler.java From mvvm-template with GNU General Public License v3.0 | 5 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder spannableStringBuilder, int start, int end) { String href = node.getAttributeByName("href"); if (href != null) { spannableStringBuilder.setSpan(new LinkSpan(href, linkColor), start, end, 33); } else if (node.getText() != null) { spannableStringBuilder.setSpan(new LinkSpan("https://github.com/" + node.getText().toString(), linkColor), start, end, 33); } }
Example #18
Source File: WrappingStyleHandler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, Style useStyle, SpanStack spanStack ) { if ( wrappedHandler != null ) { wrappedHandler.handleTagNode(node, builder, start, end, useStyle, spanStack); } }
Example #19
Source File: CSSCompiler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
@Override public boolean matches(TagNode tagNode) { if ( tagNode == null ) { return false; } //If a tag name is given it should match if (tagName != null && tagName.length() > 0 && ! tagName.equals(tagNode.getName() ) ) { return false; } String classAttribute = tagNode.getAttributeByName("class"); return classAttribute != null && classAttribute.equals(className); }
Example #20
Source File: TagNodeInsignificantBrCondition.java From web-data-extractor with Apache License 2.0 | 5 votes |
private boolean checkSublist(int start, int end, List list) { List sublist = list.subList(start, end); for (Object object : sublist) { if (!(object instanceof TagNode)) { return false; } TagNode node = (TagNode) object; if (!isBrNode(node) && !node.isPruned()) { return false; } } return true; }
Example #21
Source File: CSSCompiler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
@Override public boolean matches(TagNode tagNode) { if ( tagNode == null ) { return false; } String idAttribute = tagNode.getAttributeByName("id"); return idAttribute != null && idAttribute.equals( id ); }
Example #22
Source File: CompiledRule.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
public boolean matches( TagNode tagNode ) { for ( List<CSSCompiler.TagNodeMatcher> matcherList: matchers ) { if ( matchesChain(matcherList, tagNode)) { return true; } } return false; }
Example #23
Source File: Http.java From BotLibre with Eclipse Public License 1.0 | 5 votes |
/** * Convert the HTML input stream into DOM parsable XHTML. */ public String convertToXHTML(String html) throws IOException { StringWriter output = new StringWriter(); TagNode node = getHtmlCleaner().clean(html); node.serialize(new SimpleXmlSerializer(getHtmlCleaner().getProperties()), output); output.flush(); return output.toString(); }
Example #24
Source File: HtmlSpanner.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
/** * Creates spanned text from a TagNode. * * @param node * @return */ public Spannable fromTagNode(TagNode node, CancellationCallback cancellationCallback) { SpannableStringBuilder result = new SpannableStringBuilder(); SpanStack stack = new SpanStack(); applySpan( result, node, stack, cancellationCallback ); stack.applySpans(this, result); return result; }
Example #25
Source File: HtmlSpanner.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
private void applySpan(SpannableStringBuilder builder, TagNode node, SpanStack stack, CancellationCallback cancellationCallback) { checkForCancellation(cancellationCallback); TagNodeHandler handler = this.handlers.get(node.getName()); if ( handler == null ) { handler = new StyledTextHandler(); handler.setSpanner(this); } int lengthBefore = builder.length(); handler.beforeChildren(node, builder, stack); if ( !handler.rendersContent() ) { for (Object childNode : node.getAllChildren()) { if ( childNode instanceof ContentNode ) { handleContent( builder, childNode, stack, cancellationCallback ); } else if ( childNode instanceof TagNode ) { applySpan( builder, (TagNode) childNode, stack, cancellationCallback ); } } } int lengthAfter = builder.length(); handler.handleTagNode(node, builder, lengthBefore, lengthAfter, stack); }
Example #26
Source File: TagNodeAttValueCondition.java From web-data-extractor with Apache License 2.0 | 5 votes |
public boolean satisfy(TagNode tagNode) { if (tagNode == null || attName == null || attValue == null) { return false; } else { return isCaseSensitive ? attValue.equals(tagNode.getAttributeByName(attName)) : attValue.equalsIgnoreCase(tagNode.getAttributeByName(attName)); } }
Example #27
Source File: HorizontalLineHandler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, Style useStyle, SpanStack spanStack) { end+=1; Log.d("HorizontalLineHandler", "Draw hr from " + start + " to " + end); spanStack.pushSpan(new HorizontalLineSpan(useStyle, start, end), start, end); appendNewLine(builder); super.handleTagNode(node, builder, start, end, useStyle, spanStack); }
Example #28
Source File: StyleAttributeHandler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, Style useStyle, SpanStack spanStack) { String styleAttr = node.getAttributeByName("style"); if ( getSpanner().isAllowStyling() && styleAttr != null ) { super.handleTagNode(node, builder, start, end, parseStyleFromAttribute(useStyle, styleAttr), spanStack); } else { super.handleTagNode(node, builder, start, end, useStyle, spanStack); } }
Example #29
Source File: BorderAttributeHandler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, Style useStyle, SpanStack spanStack) { if ( node.getAttributeByName("border") != null ) { Log.d("BorderAttributeHandler", "Adding BorderSpan from " + start + " to " + end); spanStack.pushSpan(new BorderSpan(useStyle, start, end, getSpanner().isUseColoursFromStyle() ), start, end); } super.handleTagNode(node, builder, start, end, useStyle, spanStack); }
Example #30
Source File: NewLineHandler.java From SDHtmlTextView with Apache License 2.0 | 5 votes |
public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end, SpanStack spanStack) { super.handleTagNode(node, builder, start, end, spanStack); for (int i = 0; i < numberOfNewLines; i++) { appendNewLine(builder); } }