Java Code Examples for org.jsoup.select.Elements#forEach()
The following examples show how to use
org.jsoup.select.Elements#forEach() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BangumiCrawlerService.java From Pixiv-Illustration-Collection-Backend with Apache License 2.0 | 6 votes |
private List<Integer> querySubjectId(Integer pageNum) throws IOException, InterruptedException { List<Integer> idList = new ArrayList<>(24); int currentIndex = 0; //开始查找id并添加到文件 for (; currentIndex < pageNum; currentIndex++) { System.out.println("开始爬取第" + currentIndex + "页"); HttpRequest request = HttpRequest.newBuilder() .uri(URI.create("https://bangumi.tv/anime/browser/?sort=date&page=" + currentIndex)).GET().build(); String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body(); //jsoup提取文本 Document doc = Jsoup.parse(body); Elements elements = doc.getElementsByClass("subjectCover cover ll"); elements.forEach(e -> { idList.add(Integer.parseInt(e.attr("href").replaceAll("\\D", "") + "\n")); }); } return idList; }
Example 2
Source File: ZIMuKuCommon.java From SubTitleSearcher with Apache License 2.0 | 5 votes |
/** * 获取下载网址列表 * @return */ public static JSONArray getDetailList(String url) { String result = httpGet(baseUrl+url); //System.out.println(result); Document doc = Jsoup.parse(result); Elements matchList = doc.select("#subtb tbody tr"); if(matchList.size() == 0)return new JSONArray(); //System.out.println(matchList.html()); JSONArray resList = new JSONArray(); for(int i = 0 ; i < matchList.size(); i++) { Element row = matchList.get(i); JSONObject resRow = new JSONObject(); resRow.put("url", row.selectFirst("a").attr("href")); resRow.put("title", row.selectFirst("a").attr("title")); resRow.put("ext", row.selectFirst(".label-info").text()); Elements authorInfos = row.select(".gray"); StringBuffer authorInfo = new StringBuffer(); authorInfos.forEach(element ->{ authorInfo.append(element.text() + ","); }); if(authorInfo.length() > 0) { resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1)); }else { resRow.put("authorInfo", ""); } resRow.put("lang", row.selectFirst("img").attr("alt")); resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", "")); resRow.put("downCount", row.select("td").get(3).text()); resList.add(resRow); } return resList; }
Example 3
Source File: Scraper.java From rxjava2-lab with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws IOException { Document doc = Jsoup.connect("https://www.superherodb.com/characters/").get(); System.out.println("Scraping " + doc.title()); Elements links = doc.select("a[title]"); Map<String, String> names = new LinkedHashMap<>(); links.forEach(element -> { String name = element.attr("title"); String href = element.attr("href"); if (name != null && !name.trim().isEmpty() && ! isExcluded(name)) { names.put(name, href); } }); System.out.println(names.size() + " superheros and villains found"); Vertx vertx = Vertx.vertx(); WebClient client = WebClient.create(vertx); AtomicInteger counter = new AtomicInteger(); Flowable.fromIterable(names.entrySet()) .flatMapSingle(entry -> scrap(client, entry.getKey(), "https://www.superherodb.com" + entry.getValue())) .doOnNext(superStuff -> System.out.println("Retrieved " + superStuff + " (" + counter.incrementAndGet() + " / " + names.size() + ")")) .toList() .flatMapCompletable(list -> vertx.fileSystem() .rxWriteFile("src/main/resources/characters.json", new Buffer(Json.encodeToBuffer(list))) ) .subscribe( () -> System.out.println("Written " + names.size() + " super heroes and villains"), Throwable::printStackTrace ); }
Example 4
Source File: ZIMuKuCommon.java From SubTitleSearcher with Apache License 2.0 | 5 votes |
/** * 获取下载网址列表 * @return */ public static JSONArray getDetailList(String url) { String result = httpGet(baseUrl+url); //System.out.println(result); Document doc = Jsoup.parse(result); Elements matchList = doc.select("#subtb tbody tr"); if(matchList.size() == 0)return new JSONArray(); //System.out.println(matchList.html()); JSONArray resList = new JSONArray(); for(int i = 0 ; i < matchList.size(); i++) { Element row = matchList.get(i); JSONObject resRow = new JSONObject(); resRow.put("url", row.selectFirst("a").attr("href")); resRow.put("title", row.selectFirst("a").attr("title")); resRow.put("ext", row.selectFirst(".label-info").text()); Elements authorInfos = row.select(".gray"); StringBuffer authorInfo = new StringBuffer(); authorInfos.forEach(element ->{ authorInfo.append(element.text() + ","); }); if(authorInfo.length() > 0) { resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1)); }else { resRow.put("authorInfo", ""); } resRow.put("lang", row.selectFirst("img").attr("alt")); resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", "")); resRow.put("downCount", row.select("td").get(3).text()); resList.add(resRow); } return resList; }
Example 5
Source File: ApsvTimerTask.java From AlipayOrdersSupervisor-GUI with MIT License | 5 votes |
private ArrayList<ApsvOrder> findOrders(String html) { //logger.info("Html: {}", html); ArrayList<ApsvOrder> orders = new ArrayList<>(); Document doc = Jsoup.parse(html); Element ordersForm = doc.getElementById("J-submit-form"); if (ordersForm == null) { logger.error("Cannot find order list form, maybe cookie expires"); // 标记task status为异常 // TODO 弹窗提醒cookie异常 RunTasksModel.getInstance().MarkTaskException(task.id); return orders; } Elements tableBody = doc.select("#tradeRecordsIndex>tbody"); Elements orderRows = tableBody.select("tr"); orderRows.forEach(row -> { Elements timeNodes = row.select("td.time p"); String[] orderNoData = row.select("td.tradeNo p").text().split("\\|"); ApsvOrder order = new ApsvOrder(){ { taskId = task.id; time = timeNodes.get(0).text() + " " + timeNodes.get(timeNodes.size() - 1).text(); description = row.select(".memo-info").text(); memo = row.select("td.memo p").text(); tradeNo = orderNoData.length > 1 ? orderNoData[1].split(":")[1] : orderNoData[0].split(":")[1]; username = Unicode.unicodeToString(row.select("td.other p").text()); amount = Float.parseFloat(row.select("td.amount span").text().replaceAll("\\s+", "")); status = row.select("td.status p").text(); } }; order.sig = Order.Sign(order, task.pushSecret); orders.add(order); }); return orders; }
Example 6
Source File: QuietSpeculationDashboard.java From MtgDesktopCompanion with GNU General Public License v3.0 | 5 votes |
@Override protected EditionsShakers getOnlineShakesForEdition(MagicEdition ed) throws IOException { String uri = "https://www.quietspeculation.com/tradertools/prices/sets/"+ed.getSet().replace(" ", "%20"); EditionsShakers ret = new EditionsShakers(); ret.setEdition(ed); ret.setDate(new Date()); ret.setProviderName(getName()); Document content = URLTools.extractHtml(uri); Elements trs = content.getElementById("SetCards").select("tr[id]"); trs.forEach(tr->{ CardShake cs = new CardShake(); cs.setName(tr.getElementsByTag("td").get(0).text()); try { cs.setPrice(Double.parseDouble(tr.getElementsByTag("td").get(5).text().replaceAll("\\$", ""))); } catch(Exception ex) { cs.setPrice(0.0); } cs.setEd(ed.getSet()); cs.setDateUpdate(new Date()); cs.setCurrency(Currency.getInstance("USD")); ret.addShake(cs); notify(cs); }); return ret; }
Example 7
Source File: ChannelFireballPricer.java From MtgDesktopCompanion with GNU General Public License v3.0 | 5 votes |
@Override public List<MagicPrice> getLocalePrice(MagicEdition me, MagicCard card) throws IOException { ArrayList<MagicPrice> list = new ArrayList<>(); Document root = URLTools.extractHtml(baseUrl+"/products/search?query="+ URLTools.encode(card.getName())); Elements lis = root.select("ul.products li div.meta"); lis.forEach(li->{ if(!li.getElementsByTag("form").text().contains("Wishlist") && li.getElementsByTag("a").first().text().toLowerCase().startsWith(card.getName().toLowerCase())) { MagicPrice p = new MagicPrice(); p.setCountry("USA"); p.setCurrency("USD"); p.setSite(getName()); p.setUrl(baseUrl+li.getElementsByTag("a").first().attr("href")); p.setSeller(li.getElementsByTag("a").get(1).text()); p.setValue(UITools.parseDouble(li.select("span[itemprop].price").first().text().replaceAll("\\$","").trim())); p.setFoil(li.getElementsByTag("a").first().text().contains("- Foil")); list.add(p); } }); logger.info(getName() + " found " + list.size() + " item(s)"); return list; }
Example 8
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 5 votes |
@Test public void getBootstrapPage_jsModulesDoNotContainDeferAttribute() throws ServiceException { List<DependencyFilter> filters = (List<DependencyFilter>) service .getDependencyFilters(); filters.add((list, context) -> { list.clear(); // remove everything return list; }); filters.add((list, context) -> { list.add(new Dependency(Dependency.Type.JS_MODULE, "//module.js", LoadMode.EAGER)); return list; }); initUI(testUI); BootstrapContext bootstrapContext = new BootstrapContext(request, null, session, testUI, this::contextRootRelativePath); Document page = pageBuilder.getBootstrapPage(bootstrapContext); Elements scripts = page.head().getElementsByTag("script"); scripts.forEach(s -> System.err.println(s.outerHtml())); Element element = scripts.stream() .filter(elem -> elem.attr("src").equals("//module.js")) .findFirst().get(); Assert.assertFalse(element.hasAttr("defer")); Element bundle = scripts.stream() .filter(el -> el.attr("src") .equals("./VAADIN/build/vaadin-bundle-1111.cache.js")) .findFirst().get(); Assert.assertFalse(bundle.hasAttr("defer")); }
Example 9
Source File: JsoupParserIntegrationTest.java From tutorials with MIT License | 5 votes |
@Test public void examplesTraversing() { Elements sections = doc.select("section"); Element firstSection = sections.first(); Element lastSection = sections.last(); Element secondSection = sections.get(2); Elements allParents = firstSection.parents(); Element parent = firstSection.parent(); Elements children = firstSection.children(); Elements siblings = firstSection.siblingElements(); sections.forEach(el -> System.out.println("section: " + el)); }
Example 10
Source File: SubHDCommon.java From SubTitleSearcher with Apache License 2.0 | 4 votes |
/** * 获取下载网址列表 * @return */ public static JSONArray getDetailList(String url) { String result = HtHttpUtil.http.get(baseUrl+url, HtHttpUtil.http.default_charset, HtHttpUtil.http._ua, baseUrl+url); Document doc = Jsoup.parse(result); Elements matchList = doc.select(".d_table tr"); //System.out.println(matchList.html()); JSONArray detailList = new JSONArray(); for (Element matchRow : matchList) { if(matchRow.select(".dt_edition").size() == 0)continue; String html = matchRow.html(); String htmlLower = html.toLowerCase(); String downUrl = matchRow.select(".dt_down a").attr("href"); String title = matchRow.select(".dt_edition a").text().trim(); int downCount = Integer.valueOf(RegexUtil.getMatchStr(matchRow.select(".dt_count").text(), "([\\d]+)")); String ext = ""; for(String extName : AppConfig.subExtNames) { //if(StrUtil.isNotEmpty(RegexUtil.getMatchStr(html, "(>"+extName+"<)", Pattern.CASE_INSENSITIVE))) { if(htmlLower.contains(">"+extName+"<")) { ext += extName; ext += ","; } } if(ext.endsWith(",")) { ext=ext.substring(0, ext.length()-1); }else { ext="其它"; } String lang = ""; String[] langList = new String[] {"双语", "简体", "繁体", "英文"}; for(String langName : langList) { if(htmlLower.contains(">"+langName+"<")) { lang += langName; lang += ","; } } if(lang.endsWith(",")) { lang=lang.substring(0, lang.length()-1); }else { lang="其它"; } Elements labels = matchRow.select(".label"); StringBuffer labelInfo = new StringBuffer(); labels.forEach(element ->{ labelInfo.append(element.text() + ","); }); if(labelInfo.length() > 0) { labelInfo.delete(labelInfo.length()-1, labelInfo.length()); } String zimuzu = matchRow.select("a.gray").text(); JSONObject dataRow = new JSONObject(); dataRow.put("url", downUrl); dataRow.put("title", title); dataRow.put("ext", ext); dataRow.put("lang",lang); dataRow.put("rate", "-"); dataRow.put("downCount", downCount); dataRow.put("labelInfo", labelInfo); dataRow.put("zimuzu", zimuzu); detailList.add(dataRow); } return detailList; }
Example 11
Source File: SubHDCommon.java From SubTitleSearcher with Apache License 2.0 | 4 votes |
/** * 获取下载网址列表 * @return */ public static JSONArray getDetailList(String url) { String result = HtHttpUtil.http.get(baseUrl+url, HtHttpUtil.http.default_charset, HtHttpUtil.http._ua, baseUrl+url); Document doc = Jsoup.parse(result); Elements matchList = doc.select(".d_table tr"); //System.out.println(matchList.html()); JSONArray detailList = new JSONArray(); for (Element matchRow : matchList) { if(matchRow.select(".dt_edition").size() == 0)continue; String html = matchRow.html(); String htmlLower = html.toLowerCase(); String downUrl = matchRow.select(".dt_down a").attr("href"); String title = matchRow.select(".dt_edition a").text().trim(); int downCount = Integer.valueOf(RegexUtil.getMatchStr(matchRow.select(".dt_count").text(), "([\\d]+)")); String ext = ""; for(String extName : AppConfig.subExtNames) { //if(StrUtil.isNotEmpty(RegexUtil.getMatchStr(html, "(>"+extName+"<)", Pattern.CASE_INSENSITIVE))) { if(htmlLower.contains(">"+extName+"<")) { ext += extName; ext += ","; } } if(ext.endsWith(",")) { ext=ext.substring(0, ext.length()-1); }else { ext="其它"; } String lang = ""; String[] langList = new String[] {"双语", "简体", "繁体", "英文"}; for(String langName : langList) { if(htmlLower.contains(">"+langName+"<")) { lang += langName; lang += ","; } } if(lang.endsWith(",")) { lang=lang.substring(0, lang.length()-1); }else { lang="其它"; } Elements labels = matchRow.select(".label"); StringBuffer labelInfo = new StringBuffer(); labels.forEach(element ->{ labelInfo.append(element.text() + ","); }); if(labelInfo.length() > 0) { labelInfo.delete(labelInfo.length()-1, labelInfo.length()); } String zimuzu = matchRow.select("a.gray").text(); JSONObject dataRow = new JSONObject(); dataRow.put("url", downUrl); dataRow.put("title", title); dataRow.put("ext", ext); dataRow.put("lang",lang); dataRow.put("rate", "-"); dataRow.put("downCount", downCount); dataRow.put("labelInfo", labelInfo); dataRow.put("zimuzu", zimuzu); detailList.add(dataRow); } return detailList; }
Example 12
Source File: EchoMTGDashBoard.java From MtgDesktopCompanion with GNU General Public License v3.0 | 4 votes |
@Override protected EditionsShakers getOnlineShakesForEdition(MagicEdition ed) throws IOException { EditionsShakers variations = new EditionsShakers(); variations.setDate(new Date()); variations.setEdition(ed); variations.setProviderName(getName()); Document d = RequestBuilder.build().method(METHOD.GET).setClient(client) .url(EchoMTGExport.BASE_URL+"/set/"+ed.getId().toUpperCase()+"/"+ed.getSet().replace(" ", "-").toLowerCase()+"/") .addHeader(URLTools.HOST, WEBSITE) .addHeader(URLTools.REFERER, EchoMTGExport.BASE_URL) .toHtml(); Elements trs = d.select("table#set-table tr"); trs.remove(trs.first()); trs.remove(trs.last()); trs.forEach(tr->{ Elements tds = tr.getElementsByTag("td"); CardShake cs = new CardShake(); cs.setEd(ed.getId()); cs.setName(tds.get(2).getElementsByTag("a").first().text()); double price =Double.parseDouble(tds.get(4).getElementsByTag("a").first().attr("data-price")); double lastWeekPrice = price; if(!tds.get(3).text().isEmpty()) { double pc = Double.parseDouble(tds.get(3).text().replace("%",""))/100; lastWeekPrice = price - (lastWeekPrice*pc); } cs.init(price, price, lastWeekPrice); cs.setCurrency(getCurrency()); variations.addShake(cs); }); return variations; }
Example 13
Source File: EuropeanGrader.java From MtgDesktopCompanion with GNU General Public License v3.0 | 4 votes |
@Override public Grading loadGrading(String identifier) throws IOException { String url=getWebSite()+"/en/card-verifier.html"; Document d = RequestBuilder.build().method(METHOD.GET) .setClient(URLTools.newClient()) .url(url) .addContent("certificate",identifier).toHtml(); Elements trs = d.select("table.center tr"); if(trs.isEmpty()) return null; Grading grad = new Grading(); grad.setGraderName(getName()); grad.setNumberID(identifier); grad.setUrlInfo(url+"?certificate="+identifier); logger.debug("Found " + trs.text()); trs.forEach(tr->{ if(tr.text().startsWith("Centring")) grad.setCentering(Double.parseDouble(tr.text().replace("Centring grade : ","").replace(',', '.').trim())); if(tr.text().startsWith("Corner")) grad.setCorners(Double.parseDouble(tr.text().replace("Corner grade : ","").replace(',', '.').trim())); if(tr.text().startsWith("Edges")) grad.setEdges(Double.parseDouble(tr.text().replace("Edges grade : ","").replace(',', '.').trim())); if(tr.text().startsWith("Surface")) grad.setSurface(Double.parseDouble(tr.text().replace("Surface grade : ","").replace(',', '.').trim())); if(tr.text().startsWith("Final")) grad.setGradeNote(Double.parseDouble(tr.text().replace("Final grade : ","").replace(',', '.').trim())); if(tr.text().startsWith("Grading date")) { try { grad.setGradeDate(new SimpleDateFormat("dd/MM/yyyy").parse(tr.text().replace("Grading date : ","").replace(',', '.').trim())); } catch (ParseException e) { logger.error(e); } } }); return grad; }
Example 14
Source File: BeckettGrader.java From MtgDesktopCompanion with GNU General Public License v3.0 | 4 votes |
@Override public Grading loadGrading(String identifier) throws IOException { URLToolsClient c = URLTools.newClient(); String urlLogin = getWebSite()+"/login?utm_content=bkthp&utm_term=login"; String urlCheking = getWebSite()+"/grading/card-lookup"; Document d = RequestBuilder.build().url(urlLogin).setClient(c).method(METHOD.GET).toHtml(); String token = d.select("input[name='login_token']").first().attr("value"); d=RequestBuilder.build().url(urlLogin).setClient(c).method(METHOD.POST) .addContent("redirect_url", getWebSite()+"/account") .addContent("login_token", token) .addContent("email",getString("EMAIL")) .addContent("password", getString("PASS")) .toHtml(); boolean connected = !d.getElementsByTag("title").html().equalsIgnoreCase("Member Login"); if(!connected) throw new IOException("Error when login to website"); d=RequestBuilder.build().url(urlCheking).setClient(c).method(METHOD.GET) .addContent("item_type", "BGS") .addContent("item_id", identifier) .toHtml(); Element table = d.select("table.cardDetail").first(); if(table==null) return null; Elements trs=table.select("tr"); Grading grad = new Grading(); grad.setGraderName(getName()); grad.setNumberID(identifier); grad.setUrlInfo(getWebSite()+"?item_id="+identifier); trs.forEach(tr->{ if(tr.text().startsWith("Centering")) grad.setCentering(Double.parseDouble(tr.text().replace("Centering Grade : ","").trim())); if(tr.text().startsWith("Corner")) grad.setCorners(Double.parseDouble(tr.text().replace("Corner Grade : ","").trim())); if(tr.text().startsWith("Edges")) grad.setEdges(Double.parseDouble(tr.text().replace("Edges Grade : ","").trim())); if(tr.text().startsWith("Surfaces")) grad.setSurface(Double.parseDouble(tr.text().replace("Surfaces Grade : ","").trim())); if(tr.text().startsWith("Final")) grad.setGradeNote(Double.parseDouble(tr.text().replace("Final Grade : ","").trim())); if(tr.text().startsWith("Date")) { try { grad.setGradeDate(new SimpleDateFormat("EEEEE, MMMMM dd, yyyy",Locale.US).parse(tr.text().replace("Date Graded : ","").trim())); } catch(ParseException e) { logger.error(e); } } }); return grad; }