Java Code Examples for org.jsoup.select.Elements#first()
The following examples show how to use
org.jsoup.select.Elements#first() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsoupAssociationRowTableExtractor.java From wandora with GNU General Public License v3.0 | 6 votes |
private void parseTable(Element table) throws Exception{ Elements rows = table.select("tr"); Element headerRow = rows.first(); ArrayList<Topic> roles = new ArrayList<Topic>(); for(Element headerCell: headerRow.select("th")){ String roleValue = headerCell.text().trim(); if(roleValue.length() == 0) continue; Topic role = getOrCreateTopic(tm, null, roleValue); roles.add(role); } List<Element> playerRows = rows.subList(1,rows.size()); for(Element playerRow: playerRows){ try { handlePlayerRow(playerRow, roles); } catch (Exception e) { log(e.getMessage()); } } }
Example 2
Source File: BlacklistHelper.java From hipda with GNU General Public License v2.0 | 6 votes |
public static String addBlacklist2(String formhash, String username) throws Exception { ParamsMap params = new ParamsMap(); params.put("formhash", formhash); params.put("user", username); String response = OkHttpHelper.getInstance().post(HiUtils.AddBlackUrl, params); Document doc = Jsoup.parse(response); Elements errors = doc.select("div.alert_error"); if (errors.size() > 0) { Element el = errors.first(); el.select("a").remove(); return el.text(); } else { HiSettingsHelper.getInstance().addToBlacklist(username); } return ""; }
Example 3
Source File: ElementTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testRemoveBeforeIndex() { Document doc = Jsoup.parse( "<html><body><div><p>before1</p><p>before2</p><p>XXX</p><p>after1</p><p>after2</p></div></body></html>", ""); Element body = doc.select("body").first(); Elements elems = body.select("p:matchesOwn(XXX)"); Element xElem = elems.first(); Elements beforeX = xElem.parent().getElementsByIndexLessThan(xElem.elementSiblingIndex()); for(Element p : beforeX) { p.remove(); } assertEquals("<body><div><p>XXX</p><p>after1</p><p>after2</p></div></body>", TextUtil.stripNewlines(body.outerHtml())); }
Example 4
Source File: NexusParser.java From Hentoid with Apache License 2.0 | 6 votes |
@Override protected List<String> parseImages(@NonNull Content content) throws IOException { List<String> result = new ArrayList<>(); progressStart(content.getQtyPages()); /* * Open all pages and grab the URL of the displayed image */ for (int i = 0; i < content.getQtyPages(); i++) { String readerUrl = content.getReaderUrl().replace("001", Helper.formatIntAsStr(i + 1, 3)); Document doc = getOnlineDocument(readerUrl); if (doc != null) { Elements elements = doc.select("section a img"); if (elements != null && !elements.isEmpty()) { Element e = elements.first(); result.add(e.attr("src")); } } progressPlus(); } progressComplete(); return result; }
Example 5
Source File: JokeBean.java From Study_Android_Demo with Apache License 2.0 | 6 votes |
public JokeBean(Element element) { //内容 //得到内容,返回的是元素集合,然后再取第一个数据 Element tmpContent = element.getElementsByClass("content").first(); //取出文本 this.content = tmpContent.text(); //图片 //图片地址,有两种可能,有或没有 Elements tmpThumb = element.getElementsByClass("thumb"); //如果imgs为null,或者内容长度为0说明没有图片,否则有图片,取第一个即可 if(tmpThumb !=null && tmpThumb.size()>0){ //有图片,解析出图片地址,取出第一个元素 Element tmpImg = tmpThumb.first(); //得到img标签的选择器,src的属性值即为图片地址 this.img = tmpImg.select("img").attr("src"); } //链接地址 //得到class='contentHerf',取出第一个元素,得到a的选择器,取出href属性 Element tmpHerf = element.getElementsByClass("contentHerf").first(); this.contentHerf = tmpHerf.select("a").attr("href"); }
Example 6
Source File: HentaifoundryRipper.java From ripme with MIT License | 6 votes |
@Override public Document getNextPage(Document doc) throws IOException { if (!doc.select("li.next.hidden").isEmpty()) { // Last page throw new IOException("No more pages"); } Elements els = doc.select("li.next > a"); Element first = els.first(); try { String nextURL = first.attr("href"); nextURL = "https://www.hentai-foundry.com" + nextURL; return Http.url(nextURL) .referrer(url) .cookies(cookies) .get(); } catch (NullPointerException e) { throw new IOException("No more pages"); } }
Example 7
Source File: BakaTsukiParserAlternative.java From coolreader with MIT License | 6 votes |
/*** * Process li to chapter. * * @param li * @param parent * @param chapterOrder * @return */ private static PageModel processLI(Element li, String parent, int chapterOrder, String language) { PageModel p = null; Elements links = li.select("a"); if (links != null && links.size() > 0) { // TODO: need to handle multiple link in one list item Element link = links.first(); // skip if User_talk: if (link.attr("href").contains("User_talk:")) return null; p = processA(li.text(), parent, chapterOrder, link, language); } return p; }
Example 8
Source File: Mf2Parser.java From indigenous-android with GNU General Public License v3.0 | 6 votes |
private String parseImpliedUrlRelative(Element elem) { // if a.h-x[href] or area.h-x[href] then use that [href] for url if (("a".equals(elem.tagName()) || "area".equals(elem.tagName())) && elem.hasAttr("href")) { return elem.attr("href"); } //else if .h-x>a[href]:only-of-type:not[.h-*] then use that [href] for url //else if .h-x>area[href]:only-of-type:not[.h-*] then use that [href] for url for (String childTag : Arrays.asList("a", "area")) { Elements children = filterByTag(elem.children(), childTag); if(children.size() == 1) { Element child = children.first(); if (!hasRootClass(child) && child.hasAttr("href")) { return child.attr("href"); } } } return null; }
Example 9
Source File: SelectorFetcher.java From stevia with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * verify an element locator as unique * @param e * @param locator * @return * @throws Exception */ private static String verifyLocator( Element e, String locator) throws Exception { Element rootElement = e.parents().last(); if(!locator.startsWith("//")) { Elements selected = rootElement.select(locator); if (selected.size() == 1) { if (!uniqueLocators.containsKey(e)) { uniqueLocators.put(e, locator); } return locator + " UNIQUE = "+selected.first(); } else if (selected.size() > 1) { return locator + " NON-UNIQUE = "+selected; } else { return locator +" NOT FOUND - PROBLEM"; } } else if(locator.startsWith("//")) { //xpath XElements elements = Xsoup.select(rootElement, locator); if (elements.getElements().size() > 1) { return locator + " NON-UNIQUE!!! "; } else if (elements.getElements().size() == 0) { return locator +" NOT FOUND - PROBLEM"; } if (!uniqueLocators.containsKey(e)) { uniqueLocators.put(e, locator); } return locator + " UNIQUE = "+ elements.getElements().get(0); } return locator + " XPATH?"; }
Example 10
Source File: EHentaiParser.java From Hentoid with Apache License 2.0 | 5 votes |
private String getDisplayedImageUrl(@Nonnull Document doc) { Elements elements = doc.select("img#img"); if (!elements.isEmpty()) { Element e = elements.first(); return e.attr("src"); } return ""; }
Example 11
Source File: AbstractSpiderServer.java From Doctor with Apache License 2.0 | 5 votes |
/** * 症状并发症等含有通用词的 * * @param href * @return */ protected Map<String, Object> getBrief(String href, String word) throws Exception { String url = (word == null) ? (index + href) : (index + word + href.substring(href.lastIndexOf("/"))); //症状详情页 Document document = SpiderUtil.getDocument(url); Elements select = document.select("div.spider"); if (select.size() == 0) { if (document.select("div.jb-body").size()!=0){ select = document.select("div.jb-body"); }else{ logger.error("异常:详情页无详情 "+url); } } Element first = select.first(); //爬取所有描述 Map<String, Object> map = new HashMap<>(); map.put(ALL, first.text()); //判断是否有词 Elements elements = first.getElementsByTag("a"); if (elements.size()== 0) { // logger.warn("正常无spider<a> "+url); return map; } //遍历词 List<String> symptomList = new ArrayList<>(); for (Element element1 : elements) { symptomList.add(element1.text()); //新的词链接 String href1 = element1.attr("href"); //保存新词到本地txt文件 TexUtil.write(element1.text()+"\r\n"+href1+"\r\n",ProjectPath.getRootPath("/word_link.txt")); } map.put(WORD, symptomList); return map; }
Example 12
Source File: JsoupProcessor.java From AcgClub with MIT License | 5 votes |
/** * Extract first element according to a query */ private static Element element(Element container, String query) { Elements select = container.select(query); if (select.size() == 0) { return null; } return select.first(); }
Example 13
Source File: MyJsoup.java From frameworkAggregate with Apache License 2.0 | 5 votes |
private static List<FlowerCategory> getCategoryList() { List<FlowerCategory> categories = new ArrayList<FlowerCategory>(); try { Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get(); Elements catelist = doc.getElementsByClass("catelist"); Element cates = catelist.first(); List<Node> childNodes = cates.childNodes(); for (int i = 0; i < childNodes.size(); i++) { Node node = childNodes.get(i); List<Node> childs = node.childNodes(); if (childs != null && childs.size() > 0) { FlowerCategory category = new FlowerCategory(); for (int j = 0; j < childs.size(); j++) { Node child = childs.get(j); if ("a".equals(child.nodeName())) { category.setUrl(child.attr("href")); category.setImgPath(child.childNode(1).attr("src")); } else if ("h2".equals(child.nodeName())) { category.setName(child.attr("title")); } } categories.add(category); } } } catch (IOException e) { e.printStackTrace(); } return categories; }
Example 14
Source File: TestJsoup.java From frameworkAggregate with Apache License 2.0 | 5 votes |
private static List<FlowerCategory> getCategoryList() { List<FlowerCategory> categories = new ArrayList<FlowerCategory>(); try { Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get(); Elements catelist = doc.getElementsByClass("catelist"); Element cates = catelist.first(); List<Node> childNodes = cates.childNodes(); for (int i = 0; i < childNodes.size(); i++) { Node node = childNodes.get(i); List<Node> childs = node.childNodes(); if (childs != null && childs.size() > 0) { FlowerCategory category = new FlowerCategory(); for (int j = 0; j < childs.size(); j++) { Node child = childs.get(j); if ("a".equals(child.nodeName())) { category.setUrl(child.attr("href")); category.setImgPath(child.childNode(1).attr("src")); } else if ("h2".equals(child.nodeName())) { category.setName(child.attr("title")); } } categories.add(category); } } } catch (IOException e) { e.printStackTrace(); } return categories; }
Example 15
Source File: BaseTask.java From guanggoo-android with Apache License 2.0 | 5 votes |
protected boolean checkAuth(Document doc) { Elements elements = doc.select("div.usercard"); if (!elements.isEmpty()) { Element usercardElement = elements.first(); AuthInfoManager.getInstance().setUsername(usercardElement.select("div.username").first().text()); AuthInfoManager.getInstance().setAvatar(usercardElement.select("img.avatar").first().attr("src")); return true; } return false; }
Example 16
Source File: JsoupParserIntegrationTest.java From tutorials with MIT License | 5 votes |
@Test public void examplesTraversing() { Elements sections = doc.select("section"); Element firstSection = sections.first(); Element lastSection = sections.last(); Element secondSection = sections.get(2); Elements allParents = firstSection.parents(); Element parent = firstSection.parent(); Elements children = firstSection.children(); Elements siblings = firstSection.siblingElements(); sections.forEach(el -> System.out.println("section: " + el)); }
Example 17
Source File: ModifySamlResponseStepBuilder.java From keycloak with Apache License 2.0 | 4 votes |
private HttpUriRequest handlePostBinding(CloseableHttpResponse currentResponse) throws Exception { assertThat(currentResponse, statusCodeIsHC(Status.OK)); final String htmlBody = EntityUtils.toString(currentResponse.getEntity()); assertThat(htmlBody, Matchers.containsString("SAML")); org.jsoup.nodes.Document theResponsePage = Jsoup.parse(htmlBody); Elements samlResponses = theResponsePage.select("input[name=SAMLResponse]"); Elements samlRequests = theResponsePage.select("input[name=SAMLRequest]"); Elements forms = theResponsePage.select("form"); Elements relayStates = theResponsePage.select("input[name=RelayState]"); int size = samlResponses.size() + samlRequests.size(); assertThat("Checking uniqueness of SAMLResponse/SAMLRequest input field in the page", size, is(1)); assertThat("Checking uniqueness of forms in the page", forms, hasSize(1)); Element respElement = samlResponses.isEmpty() ? samlRequests.first() : samlResponses.first(); Element form = forms.first(); String base64EncodedSamlDoc = respElement.val(); InputStream decoded = PostBindingUtil.base64DecodeAsStream(base64EncodedSamlDoc); String samlDoc = IOUtils.toString(decoded, GeneralConstants.SAML_CHARSET); IOUtils.closeQuietly(decoded); String transformed = getTransformer().transform(samlDoc); if (transformed == null) { return null; } final String attributeName = this.targetAttribute != null ? this.targetAttribute : respElement.attr("name"); List<NameValuePair> parameters = new LinkedList<>(); if (! relayStates.isEmpty()) { parameters.add(new BasicNameValuePair(GeneralConstants.RELAY_STATE, relayStates.first().val())); } URI locationUri = this.targetUri != null ? this.targetUri : URI.create(form.attr("action")); return createRequest(locationUri, attributeName, transformed, parameters); }
Example 18
Source File: EHentaiParser.java From Hentoid with Apache License 2.0 | 4 votes |
public List<ImageFile> parseImageList(@NonNull Content content) throws Exception { EventBus.getDefault().register(this); try { List<ImageFile> result = new ArrayList<>(); boolean useHentoidAgent = Site.EHENTAI.canKnowHentoidAgent(); Map<String, String> downloadParams = new HashMap<>(); int order = 1; /* * 1- Detect the number of pages of the gallery * * 2- Browse the gallery and fetch the URL for every page (since all of them have a different temporary key...) * * 3- Open all pages and grab the URL of the displayed image */ // 1- Detect the number of pages of the gallery Element e; List<Pair<String, String>> headers = new ArrayList<>(); headers.add(new Pair<>(HttpHelper.HEADER_COOKIE_KEY, "nw=1")); // nw=1 (always) avoids the Offensive Content popup (equivalent to clicking the "Never warn me again" link) Document doc = getOnlineDocument(content.getGalleryUrl(), headers, useHentoidAgent); if (doc != null) { Elements elements = doc.select("table.ptt a"); if (null == elements || elements.isEmpty()) return result; int tabId = (1 == elements.size()) ? 0 : elements.size() - 2; int nbGalleryPages = Integer.parseInt(elements.get(tabId).text()); progress.start(nbGalleryPages + content.getQtyPages()); // 2- Browse the gallery and fetch the URL for every page (since all of them have a different temporary key...) List<String> pageUrls = new ArrayList<>(); fetchPageUrls(doc, pageUrls); if (nbGalleryPages > 1) { for (int i = 1; i < nbGalleryPages && !processHalted; i++) { doc = getOnlineDocument(content.getGalleryUrl() + "/?p=" + i, headers, useHentoidAgent); if (doc != null) fetchPageUrls(doc, pageUrls); progress.advance(); } } // 3- Open all pages and // - grab the URL of the displayed image // - grab the alternate URL of the "Click here if the image fails loading" link result.add(ImageFile.newCover(content.getCoverImageUrl(), StatusContent.SAVED)); ImageFile img; for (String pageUrl : pageUrls) { if (processHalted) break; doc = getOnlineDocument(pageUrl, headers, useHentoidAgent); if (doc != null) { // Displayed image String imageUrl = getDisplayedImageUrl(doc).toLowerCase(); if (!imageUrl.isEmpty()) { // If we have the 509.gif picture, it means the bandwidth limit for e-h has been reached if (imageUrl.contains("/509.gif")) throw new LimitReachedException("E-hentai download points regenerate over time or can be bought on e-hentai if you're in a hurry"); img = ParseHelper.urlToImageFile(imageUrl, order++, pageUrls.size(), StatusContent.SAVED); result.add(img); // "Click here if the image fails loading" link elements = doc.select("#loadfail"); if (!elements.isEmpty()) { e = elements.first(); String arg = e.attr("onclick"); // Get the argument between 's int quoteBegin = arg.indexOf('\''); int quoteEnd = arg.indexOf('\'', quoteBegin + 1); arg = arg.substring(quoteBegin + 1, quoteEnd); // Get the query URL if (pageUrl.contains("?")) pageUrl += "&"; else pageUrl += "?"; pageUrl += "nl=" + arg; // Get the final URL if (URLUtil.isValidUrl(pageUrl)) { downloadParams.put("backupUrl", pageUrl); String downloadParamsStr = JsonHelper.serializeToJson(downloadParams, JsonHelper.MAP_STRINGS); img.setDownloadParams(downloadParamsStr); } } } } progress.advance(); } } progress.complete(); // If the process has been halted manually, the result is incomplete and should not be returned as is if (processHalted) throw new PreparationInterruptedException(); return result; } finally { EventBus.getDefault().unregister(this); } }
Example 19
Source File: Mf2Parser.java From indigenous-android with GNU General Public License v3.0 | 4 votes |
private String parseImpliedName(Element elem) { if (("img".equals(elem.tagName()) || ("area".equals(elem.tagName())) && elem.hasAttr("alt"))) { return elem.attr("alt"); } if ("abbr".equals(elem.tagName()) && elem.hasAttr("title")) { return elem.attr("title"); } Elements children = elem.children(); if (children.size() == 1) { Element child = children.first(); // else if .h-x>img:only-child[alt]:not[.h-*] then use that img alt for name // else if .h-x>area:only-child[alt]:not[.h-*] then use that area alt for name if (!hasRootClass(child) && ("img".equals(child.tagName()) || "area".equals(child.tagName())) && child.hasAttr("alt")) { return child.attr("alt"); } // else if .h-x>abbr:only-child[title] then use that abbr title for name if ("abbr".equals(child.tagName()) && child.hasAttr("title")) { return child.attr("title"); } Elements grandChildren = child.children(); if (grandChildren.size() == 1) { Element grandChild = grandChildren.first(); // else if .h-x>:only-child>img:only-child[alt]:not[.h-*] then use that img alt for name // else if .h-x>:only-child>area:only-child[alt]:not[.h-*] then use that area alt for name if (!hasRootClass(grandChild) && ("img".equals(grandChild.tagName()) || "area".equals(grandChild.tagName())) && grandChild.hasAttr("alt")) { return grandChild.attr("alt"); } // else if .h-x>:only-child>abbr:only-child[title] use that abbr title for name if ("abbr".equals(grandChild.tagName()) && grandChild.hasAttr("c")) { return grandChild.attr("title"); } } } // else use the textContent of the .h-x for name // drop leading & trailing white-space from name, including nbsp return elem.text().trim(); }
Example 20
Source File: HiParser.java From hipda with GNU General Public License v2.0 | 4 votes |
private static SimpleListBean parseSearch(Document doc) { if (doc == null) { return null; } SimpleListBean list = new SimpleListBean(); int last_page = 1; //if this is the last page, page number is in <strong> Elements pagesES = doc.select("div.pages_btns div.pages a"); pagesES.addAll(doc.select("div.pages_btns div.pages strong")); String searchIdUrl; if (pagesES.size() > 0) { searchIdUrl = pagesES.first().attr("href"); list.setSearchId(Utils.getMiddleString(searchIdUrl, "searchid=", "&")); for (Node n : pagesES) { int tmp = Utils.getIntFromString(((Element) n).text()); if (tmp > last_page) { last_page = tmp; } } } list.setMaxPage(last_page); Elements tbodyES = doc.select("tbody"); for (int i = 0; i < tbodyES.size(); ++i) { Element tbodyE = tbodyES.get(i); SimpleListItemBean item = new SimpleListItemBean(); Elements subjectES = tbodyE.select("tr th.subject a"); if (subjectES.size() == 0) { continue; } Element titleLink = subjectES.first(); String href = titleLink.attr("href"); item.setTid(Utils.getMiddleString(href, "tid=", "&")); item.setTitle(titleLink.text()); Elements authorAES = tbodyE.select("tr td.author cite a"); if (authorAES.size() == 0) { continue; } item.setAuthor(authorAES.first().text()); String spaceUrl = authorAES.first().attr("href"); if (!TextUtils.isEmpty(spaceUrl)) { String uid = Utils.getMiddleString(spaceUrl, "uid=", "&"); item.setAvatarUrl(HiUtils.getAvatarUrlByUid(uid)); } Elements timeES = tbodyE.select("tr td.author em"); if (timeES.size() > 0) { item.setTime(timeES.first().text()); } Elements forumES = tbodyE.select("tr td.forum"); if (forumES.size() > 0) { item.setForum(forumES.first().text()); } list.add(item); } return list; }