Java Code Examples for org.jsoup.nodes.Document#getElementsByClass()
The following examples show how to use
org.jsoup.nodes.Document#getElementsByClass() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BangumiCrawlerService.java From Pixiv-Illustration-Collection-Backend with Apache License 2.0 | 6 votes |
private List<Integer> querySubjectId(Integer pageNum) throws IOException, InterruptedException { List<Integer> idList = new ArrayList<>(24); int currentIndex = 0; //开始查找id并添加到文件 for (; currentIndex < pageNum; currentIndex++) { System.out.println("开始爬取第" + currentIndex + "页"); HttpRequest request = HttpRequest.newBuilder() .uri(URI.create("https://bangumi.tv/anime/browser/?sort=date&page=" + currentIndex)).GET().build(); String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body(); //jsoup提取文本 Document doc = Jsoup.parse(body); Elements elements = doc.getElementsByClass("subjectCover cover ll"); elements.forEach(e -> { idList.add(Integer.parseInt(e.attr("href").replaceAll("\\D", "") + "\n")); }); } return idList; }
Example 2
Source File: NewService.java From Pixiv-Illustration-Collection-Backend with Apache License 2.0 | 6 votes |
private void pullACG17News() throws IOException, InterruptedException { HttpRequest request = HttpRequest.newBuilder() .uri(URI.create("http://acg17.com/category/news/")).GET().build(); String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body(); Document doc = Jsoup.parse(body); Elements elements = doc.getElementsByClass("item-list"); List<ACGNew> acgNewList = elements.stream().map(e -> { String style = e.getElementsByClass("attachment-tie-medium size-tie-medium wp-post-image").get(0).attr("style"); String cover = style.substring(style.indexOf("url(") + 4, style.indexOf(")")); Element t = e.getElementsByClass("post-box-title").get(0).child(0); LocalDate createDate = LocalDate.parse(e.getElementsByClass("tie-date").get(0).text().replaceAll("[年月]", "-").replace("日", "")); String intro = e.getElementsByClass("entry").get(0).child(0).text(); String title = t.text(); String rerfererUrl = t.attr("href"); return new ACGNew(title, intro, NewsCrawlerConstant.ACG17, cover, rerfererUrl, createDate, NewsCrawlerConstant.ACG17); }).collect(Collectors.toList()); process(acgNewList, "class", "entry"); }
Example 3
Source File: CityStats.java From zuihou-admin-boot with Apache License 2.0 | 6 votes |
public static void parseVillagetr(String url, Area countyArea) { String htmlStr = HttpUtil.get(url, CHARSET); Document document = Jsoup.parse(htmlStr); Elements trs = document.getElementsByClass("villagetr"); List<Area> counties = new LinkedList<Area>(); int sort = 1; for (Element tr : trs) { Elements tds = tr.getElementsByTag("td"); if (tds == null || tds.size() != 3) { continue; } String villagetrCode = tds.get(0).text(); String villagetrName = tds.get(2).text(); Area villagetrArea = Area.builder().code(villagetrCode).label(villagetrName).source(url) .sortValue(sort++).level(new RemoteData<>("VILLAGETR")).fullName(countyArea.getFullName() + villagetrName) .build(); StaticLog.info(" 村级数据: {} ", villagetrArea); counties.add(villagetrArea); } countyArea.setChildren(counties); }
Example 4
Source File: CityParser.java From zuihou-admin-boot with Apache License 2.0 | 6 votes |
private List<Area> parseCity(String provinceName, String url) { String htmlStr = HttpUtil.get(url, CHARSET); Document document = Jsoup.parse(htmlStr); Elements trs = document.getElementsByClass("citytr"); List<Area> cities = new LinkedList<Area>(); int sort = 1; for (Element tr : trs) { Elements links = tr.getElementsByTag("a"); String href = links.get(0).attr("href"); String cityCode = links.get(0).text(); // String cityCode = links.get(0).text().substring(0, 4); String cityName = links.get(1).text(); Area cityArea = Area.builder() .label(cityName).code(cityCode).source(url).sortValue(sort++) .level(new RemoteData<>("CITY")) .fullName(provinceName + cityName) .build(); cityArea.setChildren(parseCounty(provinceName + cityName, COMMON_URL + href)); StaticLog.info(" 市级数据: {} ", cityArea); cities.add(cityArea); } return cities; }
Example 5
Source File: NewDevDbApi.java From 4pdaClient-plus with Apache License 2.0 | 6 votes |
public static ArrayList<DevCatalog> parseBrands(IHttpClient client, String devicesTypeUrl) throws Throwable { String pageBody = client.performGet(devicesTypeUrl + "all").getResponseBody(); Document doc = Jsoup.parse(pageBody); ArrayList<DevCatalog> res = new ArrayList<>(); Elements con = doc.getElementsByClass("word-list"); Elements con1 = con.select("li"); for (Element element1 : con1) { String brandsLink = element1.getElementsByTag("a").attr("href"); String brandsName = element1.text(); DevCatalog f = new DevCatalog(brandsLink, brandsName); f.setType(DevCatalog.DEVICE_BRAND); res.add(f); } return res; }
Example 6
Source File: HTTPStudy.java From newblog with Apache License 2.0 | 5 votes |
public static void baidu(String keyword) throws Exception { String content = HttpHelper.getInstance().get(baseURL.replaceAll("keyword", keyword)); Document jsoup = Jsoup.parse(content); Elements elements = jsoup.getElementsByClass("result"); for (Element element : elements) { String str = element.select(".c-showurl").text(); if (str.contains("www.wenzhihuai.com")) { String wenzhihuai = element.select(".t").select("a").attr("href"); HttpHelper.getInstance().get(wenzhihuai); logger.info("百度->温志怀URL:" + wenzhihuai); } } }
Example 7
Source File: JsoupUtils.java From EhViewer with Apache License 2.0 | 5 votes |
@Nullable public static Element getElementByClass(Document doc, String className) { Elements elements = doc.getElementsByClass(className); if (elements != null && elements.size() > 0) { return elements.get(0); } else { return null; } }
Example 8
Source File: SearchArticleVideoViewBinder.java From Toutiao with Apache License 2.0 | 5 votes |
private Map<String, String> parseJson(String content) { Document doc = Jsoup.parse(content); Elements elements = doc.getElementsByClass("tt-video-box"); String id = elements.get(0).attr("tt-videoid"); String imageUrl = elements.get(0).attr("tt-poster"); Map<String, String> map = new HashMap<>(); if (!TextUtils.isEmpty(id)) { map.put("id", id); } if (!TextUtils.isEmpty(imageUrl)) { map.put("imageUrl", imageUrl); } return map; }
Example 9
Source File: JsoupUtils.java From MHViewer with Apache License 2.0 | 5 votes |
@Nullable public static Element getElementByClass(Document doc, String className) { Elements elements = doc.getElementsByClass(className); if (elements != null && elements.size() > 0) { return elements.get(0); } else { return null; } }
Example 10
Source File: MoikrugStrategy.java From JavaRushTasks with MIT License | 5 votes |
@Override public List<Vacancy> getVacancies(String searchString) { List<Vacancy> Vacancies = new ArrayList<>(); int pageNum = 0; Document doc = null; while(true) { try { doc = getDocument(searchString, pageNum); } catch (IOException e) { e.printStackTrace(); } Elements vacancies = doc.getElementsByClass("job"); if (vacancies.size()==0) break; for (Element element: vacancies) { if (element != null) { Vacancy vac = new Vacancy(); vac.setTitle(element.getElementsByAttributeValue("class", "title").text()); vac.setCompanyName(element.getElementsByAttributeValue("class", "company_name").text()); vac.setSiteName(URL_FORMAT); vac.setUrl("https://moikrug.ru" + element.select("a[class=job_icon]").attr("href")); String salary = element.getElementsByAttributeValue("class", "salary").text(); String city = element.getElementsByAttributeValue("class", "location").text(); vac.setSalary(salary.length()==0 ? "" : salary); vac.setCity(city.length()==0 ? "" : city); Vacancies.add(vac); } } pageNum++; } return Vacancies; }
Example 11
Source File: WikiCorpusTask.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private String retrieveLinkInLanguage(Document document, String language) { Elements elementsByClass = document.getElementsByClass("interwiki-" + language); if (elementsByClass == null || elementsByClass.isEmpty()) { // logger.info("link in " + language + " was not found"); return null; } return elementsByClass.first().child(0).attr("href"); }
Example 12
Source File: CityStats.java From zuihou-admin-boot with Apache License 2.0 | 5 votes |
public static void parseTowntr(String url, Area countyArea) { String htmlStr = HttpUtil.get(url, CHARSET); Document document = Jsoup.parse(htmlStr); Elements trs = document.getElementsByClass("towntr"); List<Area> counties = new LinkedList<Area>(); int sort = 1; for (Element tr : trs) { Elements links = tr.getElementsByTag("a"); if (links == null || links.size() != 2) { continue; } String href = links.get(0).attr("href"); String towntrCode = links.get(0).text().substring(0, 9); String towntrName = links.get(1).text(); Area towntrArea = Area.builder().label(towntrName).code(towntrCode).source(url) .sortValue(sort++).level(new RemoteData<>("TOWNTR")).fullName(countyArea.getFullName() + towntrName) .build(); StaticLog.info(" 乡镇级数据: {} ", towntrArea); parseVillagetr(COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href, countyArea); counties.add(towntrArea); } countyArea.setChildren(counties); }
Example 13
Source File: ConfluenceServerRCE.java From TrackRay with GNU General Public License v3.0 | 5 votes |
@Override public Object start() { println("请输入要读取的文件 如/etc/passwd,输入exit退出"); while(true){ String input = getInput(); if (input.equals("exit")) break; if (input.startsWith("/")) input = input.substring(1,input.length()); String format = String.format(readFilePayload, "file:///".concat(input)); Document parse = attack(format); if (parse!=null){ Elements wiki = parse.getElementsByClass("wiki-content"); if (wiki!=null&&wiki.hasText()){ String text = wiki.html(); println("========================="); sendColorMsg(Message.RED(HtmlUtils.htmlEscape(text))); println("========================="); } } } return ""; }
Example 14
Source File: CityStats.java From zuihou-admin-cloud with Apache License 2.0 | 5 votes |
public static void parseProvince(String url) { String htmlStr = HttpUtil.get(url, CHARSET); Document document = Jsoup.parse(htmlStr); // 获取 class='provincetr' 的元素 Elements elements = document.getElementsByClass("provincetr"); List<Area> provinces = new LinkedList<Area>(); int sort = 1; for (Element element : elements) { // 获取 elements 下属性是 href 的元素 Elements links = element.getElementsByAttribute("href"); for (Element link : links) { String provinceName = link.text(); String href = link.attr("href"); String provinceCode = href.substring(0, 2); StaticLog.info("provinceName: {} , provinceCode: {} .", provinceName, provinceCode); Area provinceArea = Area.builder().code(provinceCode).label(provinceName).source(url) .sortValue(sort++).fullName(provinceName).level(new RemoteData<>("PROVINCE")) .build(); StaticLog.info("省级数据: {} ", provinceArea); parseCity(COMMON_URL + href, provinceArea); provinces.add(provinceArea); } } StaticLog.info(JSONUtil.toJsonPrettyStr(provinces)); }
Example 15
Source File: CityParser.java From zuihou-admin-cloud with Apache License 2.0 | 5 votes |
/** * 乡镇级数据 * * @param url * @return */ public List<Area> parseTowntr(String fullName, String url) { String htmlStr = HttpUtil.get(url, CHARSET); Document document = Jsoup.parse(htmlStr); Elements trs = document.getElementsByClass("towntr"); List<Area> counties = new LinkedList<Area>(); int sort = 1; for (Element tr : trs) { Elements links = tr.getElementsByTag("a"); if (links == null || links.size() != 2) { continue; } String href = links.get(0).attr("href"); String towntrCode = links.get(0).text(); // String towntrCode = links.get(0).text().substring(0, 6); String towntrName = links.get(1).text(); Area towntrArea = Area.builder() .label(towntrName).code(towntrCode).source(url) .fullName(fullName + towntrName) .level(new RemoteData<>("TOWNTR")) .sortValue(sort++) // .nodes(parseVillagetr(fullName + towntrName, COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href)) .build(); StaticLog.info(" 乡镇级数据: {} ", towntrArea); counties.add(towntrArea); } return counties; }
Example 16
Source File: Character.java From KaellyBot with GNU General Public License v3.0 | 4 votes |
public static Character getCharacter(String url, Language lg) throws IOException { Document doc = JSoupManager.getDocument(url); String bigSkinURL = doc.getElementsByClass("ak-entitylook").first().attr("style"); bigSkinURL = bigSkinURL.substring(bigSkinURL.indexOf("https://"), bigSkinURL.indexOf(")")); String littleSkinURL = doc.getElementsByClass("ak-entitylook").last().toString(); littleSkinURL = littleSkinURL.substring(littleSkinURL.indexOf("https://"), littleSkinURL.indexOf(")")); String pseudo = doc.getElementsByClass("ak-return-link").first().text(); String level = doc.getElementsByClass("ak-directories-level").first().text() .replace(Translator.getLabel(lg, "whois.extract.level"), "").trim(); String classe = doc.getElementsByClass("ak-directories-breed").first().text(); String server = doc.getElementsByClass("ak-directories-server-name").first().text(); String score = doc.getElementsByClass("ak-score-text").first().text() + " (" + doc.getElementsByClass("ak-progress-bar-text").first().text() + ")"; // Optional String guildName = null; String guildUrl = null; String alliName = null; String alliUrl = null; Elements elem = doc.getElementsByClass("ak-infos-guildname"); if (!elem.isEmpty()) { guildName = elem.first().text(); guildUrl = elem.first().select("a").attr("abs:href"); elem = doc.getElementsByClass("ak-infos-alliancename"); if (!elem.isEmpty()) { alliName = elem.first().text(); alliUrl = elem.first().select("a").attr("abs:href"); } } StringBuilder ladderXP = new StringBuilder(); StringBuilder ladderKoli = new StringBuilder(); StringBuilder ladderSuccess = new StringBuilder(); elem = doc.getElementsByClass("ak-container ak-table ak-responsivetable"); if (!elem.isEmpty()) { ladderXP.append(doc.getElementsByClass("ak-total-xp").first().text()).append("\n"); for(Element cote : doc.getElementsByClass("ak-total-kolizeum")) if (! cote.text().endsWith("-1")) ladderKoli.append(cote.text().replace(Translator.getLabel(lg, "whois.extract.koli"), "").trim()).append("\n"); Elements trs = elem.first().getElementsByTag("tbody").first().getElementsByTag("tr"); for (Element tr : trs) { String ladderText = tr.getElementsByTag("td").first().text() + " : "; tr.getElementsByTag("td").first().remove(); if (!tr.getElementsByTag("td").first().text().equals("-")) ladderXP.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").first().text())).append("\n"); if (!tr.getElementsByTag("td").get(1).text().equals("-")) ladderKoli.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").get(1).text())).append("\n"); if (!tr.getElementsByTag("td").last().text().equals("-")) ladderSuccess.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").last().text())).append("\n"); } } return new Character(pseudo, level, classe, server, score, guildName, guildUrl, alliName, alliUrl, littleSkinURL, bigSkinURL, url, ladderXP.toString(), ladderKoli.toString(), ladderSuccess.toString()); }
Example 17
Source File: OneKeyWifi.java From zhangshangwuda with Apache License 2.0 | 4 votes |
public static String getErrorMessage(String html) { Document doc = null; doc = Jsoup.parse(html); Elements links = doc.getElementsByClass("msg"); return links.text().toString(); }
Example 18
Source File: addPingLun.java From xmpp with Apache License 2.0 | 4 votes |
public static void main(String[] args) { File in = new File("index.html"); News_pinglunDaoImpl ndi=new News_pinglunDaoImpl(); try { Document doc = Jsoup.parse(in, "UTF-8", ""); Elements e1 = doc.getElementsByClass("comment_item"); for (int i = e1.size()-1; i>=0; i--) { String ptime=e1.get(i).getElementsByClass("ptime").text(); ptime=ptime.replaceAll("����", ""); System.out.println(ptime + "\t" +e1.get(i).getElementsByClass("username") .text() + "\t" + (e1.get(i).getElementsByTag("img").attr("src")) + "\t" + e1.get(i).getElementsByClass("comment_body").text()); int id = 30;//����id String user = e1.get(i).getElementsByClass("username").text()+ ";" + (e1.get(i).getElementsByTag("img").attr("src")); String plocation = ""; String pcontent = e1.get(i).getElementsByClass("comment_body").text(); String zan = "0"; News_pinglun news = new News_pinglun(id, user, plocation, ptime, pcontent, zan); if (ndi.save(news)) { } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
Example 19
Source File: CustomVRaptorIntegration.java From mamute with Apache License 2.0 | 4 votes |
protected Elements getElementsByClass(String html, String cssClass) { Document document = Jsoup.parse(html); return document.getElementsByClass(cssClass); }
Example 20
Source File: TCGPlayerDeckSniffer.java From MtgDesktopCompanion with GNU General Public License v3.0 | 4 votes |
@Override public List<RetrievableDeck> getDeckList() throws IOException { String url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT); logger.debug("get List deck at " + url); List<RetrievableDeck> list = new ArrayList<>(); int maxPage = getInt(MAX_PAGE); for (int i = 1; i <= maxPage; i++) { url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT) + "&page=" + i; Document d = Jsoup.parse(IncapsulaParser.readUrl(url)); for (Element tr : d.getElementsByClass("gradeA")) { RetrievableDeck deck = new RetrievableDeck(); String mana = ""; Element manaEl = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(0); if (manaEl.toString().contains("white-mana")) mana += "{W}"; if (manaEl.toString().contains("blue-mana")) mana += "{U}"; if (manaEl.toString().contains("black-mana")) mana += "{B}"; if (manaEl.toString().contains("red-mana")) mana += "{R}"; if (manaEl.toString().contains("green-mana")) mana += "{G}"; String deckName = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).text(); String link = getString(URL) + tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).getElementsByTag("a").attr("href"); String deckPlayer = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(2).text(); String deckDesc = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(3).text(); deck.setColor(mana); deck.setAuthor(deckPlayer); deck.setName(deckName); deck.setDescription(deckDesc); try { deck.setUrl(new URI(link)); } catch (URISyntaxException e) { deck.setUrl(null); } list.add(deck); } } return list; }