Java Code Examples for org.jsoup.nodes.Element#hasAttr()
The following examples show how to use
org.jsoup.nodes.Element#hasAttr() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Aw22Rule06031.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
/** * * @param sspHandler * @param el * @param linkText * @return */ private TestSolution testTitleAttributeLink( SSPHandler sspHandler, Element el, String linkText) { // if the current has no title or has an empty title or has a title // content identical to the link text, returns not applicable. if (!el.hasAttr(TITLE_ATTR)) { return TestSolution.NOT_APPLICABLE; } String attrValue=el.attr(TITLE_ATTR); if (StringUtils.isBlank(attrValue)) { return TestSolution.NOT_APPLICABLE; } if (StringUtils.equalsIgnoreCase(attrValue, linkText)) { return TestSolution.NOT_APPLICABLE; } ElementHandler<Element> elHandler = new ElementHandlerImpl(el); TestSolutionHandler tsHandler = new TestSolutionHandlerImpl(); titlePertinenceElementChecker.check(sspHandler, elHandler, tsHandler); return tsHandler.getTestSolution(); }
Example 2
Source File: Rgaa32016Rule060303.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
/** * * @param sspHandler * @param el * @param linkText * @return */ private TestSolution testTitleAttributeLink( SSPHandler sspHandler, Element el, String linkText) { // if the current has no title or has an empty title or has a title // content identical to the link text, returns not applicable. if (!el.hasAttr(TITLE_ATTR)) { return TestSolution.NOT_APPLICABLE; } String attrValue=el.attr(TITLE_ATTR); if (StringUtils.isBlank(attrValue)) { return TestSolution.NOT_APPLICABLE; } if (StringUtils.equalsIgnoreCase(attrValue, linkText)) { return TestSolution.NOT_APPLICABLE; } ElementHandler<Element> elHandler = new ElementHandlerImpl(el); TestSolutionHandler tsHandler = new TestSolutionHandlerImpl(); titlePertinenceElementChecker.check(sspHandler, elHandler, tsHandler); return tsHandler.getTestSolution(); }
Example 3
Source File: Rgaa32016Rule110102.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
/** * This method linked each input on a page to its form in a map. */ private void putInputElementHandlerIntoTheMap () { for (Element el : inputElementHandler.get()) { if (!el.hasAttr(TITLE_ATTR) && !el.hasAttr(ARIA_LABEL_ATTR) && !el.hasAttr(ARIA_LABELLEDBY_ATTR)) { Element tmpElement = el.parent(); while (StringUtils.isNotBlank(tmpElement.tagName())) { if (tmpElement.tagName().equals(FORM_ELEMENT)) { if (inputFormMap.containsKey(tmpElement)) { inputFormMap.get(tmpElement).add(el); } else { ElementHandler<Element> inputElement = new ElementHandlerImpl(); inputElement.add(el); inputFormMap.put(tmpElement, inputElement); } break; } tmpElement = tmpElement.parent(); } } } }
Example 4
Source File: M2DocHTMLParser.java From M2Doc with Eclipse Public License 1.0 | 6 votes |
/** * Sets the unordered list numbering. * * @param context * the {@link Context} * @param element * the ol {@link Element} */ private void setUnorderedListNumbering(Context context, Element element) { final String symbol; if (element.hasAttr(TYPE_ATTR)) { final String type = element.attr(TYPE_ATTR); if ("disc".equals(type)) { symbol = DISC_SYMBOL; } else if ("square".equals(type)) { symbol = SQUARE_SYMBOL; } else if ("circle".equals(type)) { symbol = CIRCLE_SYMBOL; } else { symbol = DISC_SYMBOL; } } else { symbol = DISC_SYMBOL; } if (context.numbering == null) { createNumbering(context); } context.numberingLevel = incrementNumberingLevel(context.numbering, context.numberingLevel, STNumberFormat.BULLET, 1, symbol, false); }
Example 5
Source File: Mf2Parser.java From indigenous-android with GNU General Public License v3.0 | 6 votes |
private String parseImpliedUrlRelative(Element elem) { // if a.h-x[href] or area.h-x[href] then use that [href] for url if (("a".equals(elem.tagName()) || "area".equals(elem.tagName())) && elem.hasAttr("href")) { return elem.attr("href"); } //else if .h-x>a[href]:only-of-type:not[.h-*] then use that [href] for url //else if .h-x>area[href]:only-of-type:not[.h-*] then use that [href] for url for (String childTag : Arrays.asList("a", "area")) { Elements children = filterByTag(elem.children(), childTag); if(children.size() == 1) { Element child = children.first(); if (!hasRootClass(child) && child.hasAttr("href")) { return child.attr("href"); } } } return null; }
Example 6
Source File: StockToStockWithCompanyInfoMapper.java From XueQiuSuperSpider with MIT License | 6 votes |
private void initMap() throws Exception { industryMap = new HashMap<>(); String target = URLMapper.COMPREHENSIVE_PAGE.toString(); String content = request(new URL(target)); Document doc = Jsoup.parse(content); Elements element = doc.getElementsByClass("second-nav") .get(1).children() .get(3).children() .get(3).children() .select("a"); StringBuilder builder = new StringBuilder(); for (Element ele : element) { if (!ele.hasAttr("title") || !ele.hasAttr("href")) continue; builder.append(ele.attr("href")); industryMap.put(ele.attr("title"), new Industry(ele.attr("title"), builder.toString())); builder.delete(0, builder.length()); } }
Example 7
Source File: Aw22Rule06034.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
/** * * @param sspHandler * @param el * @param linkText * @return */ private TestSolution testTitleAttributeLink( SSPHandler sspHandler, Element el, String linkText) { // if the current has no title or has an empty title or has a title // content identical to the link text, returns not applicable. if (!el.hasAttr(TITLE_ATTR)) { return TestSolution.NOT_APPLICABLE; } String attrValue=el.attr(TITLE_ATTR); if (StringUtils.isBlank(attrValue)) { return TestSolution.NOT_APPLICABLE; } if (StringUtils.equalsIgnoreCase(attrValue, linkText)) { return TestSolution.NOT_APPLICABLE; } ElementHandler<Element> elHandler = new ElementHandlerImpl(el); TestSolutionHandler tsHandler = new TestSolutionHandlerImpl(); titlePertinenceElementChecker.check(sspHandler, elHandler, tsHandler); return tsHandler.getTestSolution(); }
Example 8
Source File: Page.java From WebCollector with GNU General Public License v3.0 | 5 votes |
/** * 获取网页中满足指定css选择器的所有元素的指定属性的集合 * 例如通过attrs("img[src]","abs:src")可获取网页中所有图片的链接 * * @param cssSelector * @param attrName * @return */ public ArrayList<String> attrs(String cssSelector, String attrName) { ArrayList<String> result = new ArrayList<String>(); Elements eles = select(cssSelector); for (Element ele : eles) { if (ele.hasAttr(attrName)) { result.add(ele.attr(attrName)); } } return result; }
Example 9
Source File: Rgaa30Rule090102.java From Asqatasun with GNU Affero General Public License v3.0 | 5 votes |
@Override protected void select(SSPHandler sspHandler) { super.select(sspHandler); Iterator<Element> elementsIterator = getElements().get().iterator(); while (elementsIterator.hasNext()) { Element element = elementsIterator.next(); if (element.hasAttr("aria-level")) { if (!PATTERN.matcher(element.attr("aria-level")).matches()) { elementsIterator.remove(); } } } }
Example 10
Source File: SankakuComplexRipper.java From ripme with MIT License | 5 votes |
@Override public Document getNextPage(Document doc) throws IOException { Element pagination = doc.select("div.pagination").first(); if (pagination.hasAttr("next-page-url")) { String nextPage = pagination.attr("abs:next-page-url"); // Only logged in users can see past page 25 // Trying to rip page 26 will throw a no images found error if (!nextPage.contains("page=26")) { LOGGER.info("Getting next page: " + pagination.attr("abs:next-page-url")); return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get(); } } throw new IOException("No more pages"); }
Example 11
Source File: Elements.java From astor with GNU General Public License v2.0 | 5 votes |
/** Get an attribute value from the first matched element that has the attribute. @param attributeKey The attribute key. @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true), or if the no elements have the attribute, returns empty string. @see #hasAttr(String) */ public String attr(String attributeKey) { for (Element element : this) { if (element.hasAttr(attributeKey)) return element.attr(attributeKey); } return ""; }
Example 12
Source File: NatalieMuRipper.java From ripme with MIT License | 5 votes |
@Override public List<String> getURLsFromPage(Document page) { List<String> imageURLs = new ArrayList<>(); Pattern p; Matcher m; //select all album thumbnails for (Element span : page.select(".NA_articleGallery span")) { if (!span.hasAttr("style")) { continue; } String style = span.attr("style").trim(); p = Pattern.compile("background-image: url\\((.*list_thumb_inbox.*)\\);", Pattern.CASE_INSENSITIVE); m = p.matcher(style); if (m.find()) { String imgUrl = m.group(1); if (imgUrl.startsWith("//")) { imgUrl = "http:" + imgUrl; } if (imgUrl.startsWith("/")) { imgUrl = "http://" + this.url.getHost() + imgUrl; } //convert thumbnail url into fullsize url imgUrl = imgUrl.replace("list_thumb_inbox","xlarge"); // Don't download the same URL twice if (imageURLs.contains(imgUrl)) { LOGGER.debug("Already attempted: " + imgUrl); continue; } imageURLs.add(imgUrl); if (isThisATest()) { break; } } if (isStopped()) { break; } } return imageURLs; }
Example 13
Source File: Rgaa30Rule010204.java From Asqatasun with GNU Affero General Public License v3.0 | 5 votes |
/** * * @param svgElements * @param svgElementsWithoutRoleImage * @param ariaAttrOnSvgOrChild * @param svgElementsWithDescOrTitleChild * @param titleAttrOnSvgOrChild */ private void extractMalformedPatternDetectedElements ( ElementHandler<Element> svgElements, ElementHandler<Element> svgElementsWithoutRoleImage, ElementHandler<Element> ariaAttrOnSvgOrChild, ElementHandler<Element> svgElementsWithDescOrTitleChild, ElementHandler<Element> titleAttrOnSvgOrChild, ElementHandler<Element> wellFormedSvgElements) { for(Element element : svgElements.get()) { boolean patternDetected= false; if (!StringUtils.equalsIgnoreCase(element.attr(ROLE_ATTR), "img")) { svgElementsWithoutRoleImage.add(element); patternDetected= true; } if (element.hasAttr(ARIA_LABEL_ATTR) || element.hasAttr(ARIA_LABELLEDBY_ATTR) || element.hasAttr(ARIA_DESCRIBEDBY_ATTR) || !element.select(ARIA_DESCRIBEDBY_CSS_LIKE_QUERY+","+ ARIA_LABEL_CSS_LIKE_QUERY+","+ARIA_LABELLEDBY_CSS_LIKE_QUERY).isEmpty()) { ariaAttrOnSvgOrChild.add(element); patternDetected= true; } if (!element.select(NOT_EMPTY_ARIA_TITLE_CSS_LIKE_QUERY+","+NOT_EMPTY_ARIA_DESC_CSS_LIKE_QUERY).isEmpty()) { svgElementsWithDescOrTitleChild.add(element); patternDetected= true; } if (element.hasAttr(TITLE_ELEMENT) || !element.select("[title]").isEmpty()) { titleAttrOnSvgOrChild.add(element); patternDetected= true; } if (wellFormedSvgElements != null && !patternDetected) { wellFormedSvgElements.add(element); } } }
Example 14
Source File: Elements.java From astor with GNU General Public License v2.0 | 5 votes |
/** Get an attribute value from the first matched element that has the attribute. @param attributeKey The attribute key. @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true), or if the no elements have the attribute, returns empty string. @see #hasAttr(String) */ public String attr(String attributeKey) { for (Element element : this) { if (element.hasAttr(attributeKey)) return element.attr(attributeKey); } return ""; }
Example 15
Source File: Mf2Parser.java From indigenous-android with GNU General Public License v3.0 | 4 votes |
private String parseImpliedName(Element elem) { if (("img".equals(elem.tagName()) || ("area".equals(elem.tagName())) && elem.hasAttr("alt"))) { return elem.attr("alt"); } if ("abbr".equals(elem.tagName()) && elem.hasAttr("title")) { return elem.attr("title"); } Elements children = elem.children(); if (children.size() == 1) { Element child = children.first(); // else if .h-x>img:only-child[alt]:not[.h-*] then use that img alt for name // else if .h-x>area:only-child[alt]:not[.h-*] then use that area alt for name if (!hasRootClass(child) && ("img".equals(child.tagName()) || "area".equals(child.tagName())) && child.hasAttr("alt")) { return child.attr("alt"); } // else if .h-x>abbr:only-child[title] then use that abbr title for name if ("abbr".equals(child.tagName()) && child.hasAttr("title")) { return child.attr("title"); } Elements grandChildren = child.children(); if (grandChildren.size() == 1) { Element grandChild = grandChildren.first(); // else if .h-x>:only-child>img:only-child[alt]:not[.h-*] then use that img alt for name // else if .h-x>:only-child>area:only-child[alt]:not[.h-*] then use that area alt for name if (!hasRootClass(grandChild) && ("img".equals(grandChild.tagName()) || "area".equals(grandChild.tagName())) && grandChild.hasAttr("alt")) { return grandChild.attr("alt"); } // else if .h-x>:only-child>abbr:only-child[title] use that abbr title for name if ("abbr".equals(grandChild.tagName()) && grandChild.hasAttr("c")) { return grandChild.attr("title"); } } } // else use the textContent of the .h-x for name // drop leading & trailing white-space from name, including nbsp return elem.text().trim(); }
Example 16
Source File: Evaluator.java From astor with GNU General Public License v2.0 | 4 votes |
@Override public boolean matches(Element root, Element element) { return element.hasAttr(key); }
Example 17
Source File: Evaluator.java From jsoup-learning with MIT License | 4 votes |
@Override public boolean matches(Element root, Element element) { return element.hasAttr(key); }
Example 18
Source File: Evaluator.java From astor with GNU General Public License v2.0 | 4 votes |
@Override public boolean matches(Element root, Element element) { return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key).trim()); }
Example 19
Source File: UntisInfoParser.java From substitution-schedule-parser with Mozilla Public License 2.0 | 4 votes |
private int getRowspan(Element cell) { return cell.hasAttr("rowspan") ? Integer.valueOf(cell.attr("rowspan")) : 1; }
Example 20
Source File: ThechiveRipper.java From ripme with MIT License | 4 votes |
private List<String> getUrlsFromThechive(Document doc) { /* * The image urls are stored in a <script> tag of the document. This script * contains a single array var by name CHIVE_GALLERY_ITEMS. * * We grab all the <img> tags from the particular script, combine them in a * string, parse it, and grab all the img/gif urls. * */ List<String> result = new ArrayList<>(); Elements scripts = doc.getElementsByTag("script"); for (Element script : scripts) { String data = script.data(); if (!data.contains("CHIVE_GALLERY_ITEMS")) { continue; } /* * We add all the <img/> tags in a single StringBuilder and parse as HTML for * easy sorting of img/ gifs. */ StringBuilder allImgTags = new StringBuilder(); Matcher matcher = imagePattern.matcher(data); while (matcher.find()) { // Unescape '\' from the img tags, which also unescape's img url as well. allImgTags.append(matcher.group(0).replaceAll("\\\\", "")); } // Now we parse and sort links. Document imgDoc = Jsoup.parse(allImgTags.toString()); Elements imgs = imgDoc.getElementsByTag("img"); for (Element img : imgs) { if (img.hasAttr("data-gifsrc")) { // For gifs. result.add(img.attr("data-gifsrc")); } else { // For jpeg images. result.add(img.attr("src")); } } } // strip all GET parameters from the links( such as quality, width, height as to // get the original image.). result.replaceAll(s -> s.substring(0, s.indexOf("?"))); return result; }