org.jsoup.nodes.Element#equals

Source File: CommonUtil.java From CrawlerForReader with Apache License 2.0

6 votes

/**
 * 获取同名元素在同胞中的index
 *
 * @param e
 * @return
 */
public static int getElIndexInSameTags(Element e) {
    Elements chs = e.parent().children();
    int index = 1;
    for (int i = 0; i < chs.size(); i++) {
        Element cur = chs.get(i);
        if (e.tagName().equals(cur.tagName())) {
            if (e.equals(cur)) {
                break;
            } else {
                index += 1;
            }
        }
    }
    return index;
}

Source File: OutputFormatter.java From Xndroid with GNU General Public License v3.0

5 votes

private int append(Element node, StringBuilder sb, String tagName) {
    int countOfP = 0; // Number of P elements in the article
    int paragraphWithTextIndex = 0;
    // is select more costly then getElementsByTag?
    MAIN:
    for (Element e : node.select(tagName)) {
        Element tmpEl = e;
        // check all elements until 'node'
        while (tmpEl != null && !tmpEl.equals(node)) {
            if (unlikely(tmpEl))
                continue MAIN;
            tmpEl = tmpEl.parent();
        }

        String text = node2Text(e);
        if (text.isEmpty() || text.length() < getMinParagraph(paragraphWithTextIndex)
                || text.length() > SHelper.countLetters(text) * 2) {
            continue;
        }

        if (e.tagName().equals("p")) {
            countOfP++;
        }

        sb.append(text);
        sb.append("\n\n");
        paragraphWithTextIndex += 1;
    }

    return countOfP;
}

Source File: ProcessRemarkServiceImpl.java From Asqatasun with GNU Affero General Public License v3.0

5 votes

/**
 * This methods search the line where the current node is present in
 * the source code
 * @param node
 * @return
 */
private int getElementIndex(Element element) {
    Elements elements = jsoupDocument.getElementsByTag(element.tagName());
    for (int i = 0; i < elements.size(); i++) {
        Element current = elements.get(i);
        if (current.equals(element)) {
            return i;
        }
    }
    return -1;
}

Source File: ImageElementSelector.java From Asqatasun with GNU Affero General Public License v3.0

5 votes

/**
 * 
 * @param imageParent
 * @param image
 * @return whether the current image is an image link
 */
private boolean isImageLink(Element imageParent, Element image) {
    if (imageParent == null || !StringUtils.equals(imageParent.text(), image.text())) {
        return false;
    }
    if (imageParent.children().size() == 1) {
        return isImageLink(imageParent.child(0), image);
    } else if (imageParent.children().isEmpty() && imageParent.equals(image)) {
        return true;
    }
    return false;
}

Source File: OutputFormatter.java From JumpGo with Mozilla Public License 2.0

5 votes

private int append(Element node, StringBuilder sb, String tagName) {
    int countOfP = 0; // Number of P elements in the article
    int paragraphWithTextIndex = 0;
    // is select more costly then getElementsByTag?
    MAIN:
    for (Element e : node.select(tagName)) {
        Element tmpEl = e;
        // check all elements until 'node'
        while (tmpEl != null && !tmpEl.equals(node)) {
            if (unlikely(tmpEl))
                continue MAIN;
            tmpEl = tmpEl.parent();
        }

        String text = node2Text(e);
        if (text.isEmpty() || text.length() < getMinParagraph(paragraphWithTextIndex)
                || text.length() > SHelper.countLetters(text) * 2) {
            continue;
        }

        if (e.tagName().equals("p")) {
            countOfP++;
        }

        sb.append(text);
        sb.append("\n\n");
        paragraphWithTextIndex += 1;
    }

    return countOfP;
}

Source File: Selector.java From astor with GNU General Public License v2.0

5 votes

static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}

Source File: Selector.java From astor with GNU General Public License v2.0

5 votes

static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}

Source File: Selector.java From astor with GNU General Public License v2.0

5 votes

static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}

Source File: CommonUtil.java From JsoupXpath with Apache License 2.0

5 votes

/**
 * 获取同名元素在同胞中的index
 * @param e
 * @return
 */
public static int getElIndexInSameTags(Element e,Scope scope){
    Elements chs = e.parent().children();
    int index = 1;
    for (Element cur : chs) {
        if (e.tagName().equals(cur.tagName()) && scope.context().contains(cur)) {
            if (e.equals(cur)) {
                break;
            } else {
                index += 1;
            }
        }
    }
    return index;
}

Source File: HTMLEasyPDFConverterReader.java From TableDisentangler with GNU General Public License v3.0

5 votes

/**
 * Read table caption.
 *
 * @param tablexmlNode the tablexml node
 * @return the string
 */
public String readTableCaption(Element tablexmlNode)
{
	//TODO: Obtain label as the first p tag before table tag
	String caption = "";
	boolean captionFound = false;
	List<Element>nl = getChildrenByTagName(tablexmlNode,"caption");
	if(nl.size()>0){
		caption = Utilities.getString(nl.get(0));
	}
	Elements inDivChild = tablexmlNode.parent().getAllElements().first().children();
	for(int i =0;i<inDivChild.size();i++){
		Element el = inDivChild.get(i);
		if(i+1<inDivChild.size()){
			Element tableel = inDivChild.get(i+1);
			if(tableel.tagName().equals("table") && captionFound==false && tableel.equals(tablexmlNode)){
				caption = el.text();
				captionFound = true;
			}
		}
	
	}
	nl = getChildrenByTagName(tablexmlNode,"title");
	if(nl.size()>0){
		caption = Utilities.getString(nl.get(0));
	}
	return caption;
}

Source File: Selector.java From jsoup-learning with MIT License

5 votes

static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
    Elements output = new Elements();
    for (Element el : elements) {
        boolean found = false;
        for (Element out : outs) {
            if (el.equals(out)) {
                found = true;
                break;
            }
        }
        if (!found)
            output.add(el);
    }
    return output;
}

Source File: HTMLEasyPDFConverterReader.java From TableDisentangler with GNU General Public License v3.0

4 votes

/**
 * Read table footer.
 *
 * @param tablesxmlNode the tablesxml node
 * @return the string
 */
public String ReadTableFooter(Element tablesxmlNode)
{
	//TODO: Obtain label as the first p tag before table tag
			String caption = "";
			
			List<Element>nl = getChildrenByTagName(tablesxmlNode,"caption");
			if(nl.size()>0){
				caption = Utilities.getString(nl.get(0));
			}
			nl = getChildrenByTagName(tablesxmlNode,"p");
			if(nl.size()>0){
				for(int i=0;i<nl.size();i++){
				caption += Utilities.getString(nl.get(i))+'\n';
				}
			}
			
			Elements inDivChild = tablesxmlNode.parent().getAllElements().first().children();
			boolean isafterTable = false;
			for(int i =0;i<inDivChild.size();i++){
				Element el = inDivChild.get(i);
				if(el.tagName().equals("table")&& el.equals(tablesxmlNode))
				{
					//caption = "";
					isafterTable = true;
					continue;
				}
				if(isafterTable && !el.tagName().equals("table")){

						caption += el.text()+'\n';
					
				}
				if(el.tagName().equals("table"))
				{
					isafterTable= false;
				}
			
			}
			
			nl = getChildrenByTagName(tablesxmlNode,"title");
			if(nl.size()>0){
				caption = Utilities.getString(nl.get(0));
			}
			return caption;
}

Java Code Examples for org.jsoup.nodes.Element#equals()