Java Code Examples for org.jsoup.nodes.Element#select()
The following examples show how to use
org.jsoup.nodes.Element#select() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SpiderService.java From Doctor with Apache License 2.0 | 6 votes |
/** * 获取疾病属性 * @param attr * @return * @throws IOException */ private List<Propertiy> queryPropertiys(String attr){ List<Propertiy> propertiyList = new ArrayList<>(); Propertiy propertiy; Document document = getDocument(attr); Elements elements = document.select(div_content); for (Element element : elements){ propertiy = new Propertiy(); if (element.select(span_con)!=null && element.select(span_con).first()!=null) { propertiy.setName(element.select(span_con).first().text()); }else{ propertiy.setName(p_default); } //爬取属性 propertiy.setId(-1); propertiy.setNumber(propertiyList.size()+1); propertiy.setValue(getValue(element)); propertiyList.add(propertiy); } return propertiyList; }
Example 2
Source File: LeaveOneOutCV.java From NLIWOD with GNU Affero General Public License v3.0 | 6 votes |
public static ArrayList<String> loadSystemP(String system){ Path datapath = Paths.get("./src/main/resources/QALD6MultilingualLogs/multilingual_" + system + ".html"); ArrayList<String> result = Lists.newArrayList(); try{ String loadedData = Files.lines(datapath).collect(Collectors.joining()); Document doc = Jsoup.parse(loadedData); Element table = doc.select("table").get(5); Elements tableRows = table.select("tr"); for(Element row: tableRows){ Elements tableEntry = row.select("td"); result.add(tableEntry.get(2).ownText()); } result.remove(0); //remove the head of the table return result; }catch(IOException e){ e.printStackTrace(); log.debug("loading failed."); return result; } }
Example 3
Source File: HiParser.java From hipda with GNU General Public License v2.0 | 6 votes |
private static SimpleListItemBean parseFriendInfo(Element root) { SimpleListItemBean item = new SimpleListItemBean(); item.setTitle("好友信息"); Elements aES = root.select("a"); if (aES.size() > 0) { String uid = Utils.getMiddleString(aES.first().attr("href"), "uid=", "&"); item.setAvatarUrl(HiUtils.getAvatarUrlByUid(uid)); item.setUid(uid); item.setAuthor(aES.first().text()); } // new Elements imgES = root.select("img"); if (imgES.size() > 0) { if (imgES.first().attr("src").contains(HiUtils.NewPMImage)) { item.setNew(true); } } //remove add friend link/text if (aES.size() > 1) { aES.get(1).remove(); } item.setInfo(root.text()); return item; }
Example 4
Source File: CDTClassifierEvaluation.java From NLIWOD with GNU Affero General Public License v3.0 | 6 votes |
public static ArrayList<String> loadSystemR(String system){ Path datapath = Paths.get("./src/main/resources/QALD6MultilingualLogs/multilingual_" + system + ".html"); ArrayList<String> result = Lists.newArrayList(); try{ String loadedData = Files.lines(datapath).collect(Collectors.joining()); Document doc = Jsoup.parse(loadedData); Element table = doc.select("table").get(5); Elements tableRows = table.select("tr"); for(Element row: tableRows){ Elements tableEntry = row.select("td"); result.add(tableEntry.get(1).ownText()); } result.remove(0); //remove the head of the table return result; }catch(IOException e){ e.printStackTrace(); log.debug("loading failed."); return result; } }
Example 5
Source File: SelectorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void descendant() { String h = "<div class=head><p class=first>Hello</p><p>There</p></div><p>None</p>"; Document doc = Jsoup.parse(h); Element root = doc.getElementsByClass("head").first(); Elements els = root.select(".head p"); assertEquals(2, els.size()); assertEquals("Hello", els.get(0).text()); assertEquals("There", els.get(1).text()); Elements p = root.select("p.first"); assertEquals(1, p.size()); assertEquals("Hello", p.get(0).text()); Elements empty = root.select("p .first"); // self, not descend, should not match assertEquals(0, empty.size()); Elements aboveRoot = root.select("body div.head"); assertEquals(0, aboveRoot.size()); }
Example 6
Source File: MovieServiceImpl.java From albert with MIT License | 5 votes |
@Override public List<Movie> getPage(PageInfo page) { Document doc=null; try { doc = getConnect(getPageUrl(page.getCurPageNo())).get(); } catch (IOException e) { throw new RuntimeException(e); } Elements els = doc.select("#post_container > li"); if(els != null){ List<Movie> Movies = new ArrayList<>(); for(int i = 0;i<els.size();i++){ Element li = els.get(i); Elements as = li.select(".thumbnail a"); for(Element a:as){ String href = a.attr("href"); Elements img = a.select(" > img"); String imgUrl = img.attr("src"); String title = a.attr("title"); if(!checkHave(title)){ Movie vo = getRecord(href); vo.setName(title); vo.setImg(imgUrl); movieMapper.addMovie(vo); logger.info("插入:"+vo.getName()); Movie vi = movieMapper.getMovieDetailbyName(title); vi.setReviewNum(0); vi.setAddTime(new Date()); vi.setUpdateTime(new Date()); super.sendMessage(vi, Constants.Cache.Type.save); Movies.add(vo); } } // if(i==2)break; } return Movies; } return null; }
Example 7
Source File: EudicSentence.java From ankihelper with GNU General Public License v3.0 | 5 votes |
static String getSingleQueryResult(Element soup, String query, boolean toString){ Elements re = soup.select(query); if(!re.isEmpty()){ if(toString) { return re.get(0).toString(); } else{ return re.get(0).text(); } }else{ return ""; } }
Example 8
Source File: indianExpress.java From Gazetti_Newspaper_Reader with MIT License | 5 votes |
private String getImageURL(Element bodyElement) { Elements mainImageElement = bodyElement.select(ConfigService.getInstance().getIndianExpressImage()); if (mainImageElement.size() != 0) { mImageURL = mainImageElement.first().attr("src"); } return mImageURL; }
Example 9
Source File: AppsGamesCatalogApi.java From 4pdaClient-plus with Apache License 2.0 | 5 votes |
public static ArrayList<Topic> loadCategoryThemes(IHttpClient client, String catalogId) throws IOException { String pageBody = client.performGet(APPS_CATALOG_URL).getResponseBody(); ArrayList<Topic> res = new ArrayList<>(); Pattern pattern = Pattern.compile("<a name=\"entry" + catalogId + "\">([\\s\\S]*?)</div>(?:<!--Begin Msg Number|<!-- TABLE FOOTER)", Pattern.CASE_INSENSITIVE); Matcher m = pattern.matcher(pageBody); if (!m.find()) return res; Document doc = Jsoup.parse(m.group(1)); Elements subCategoryElements = doc.select("ol[type=1]"); for (Element subCategoryElement : subCategoryElements) { String subCategoryTitle = ""; Elements elements = subCategoryElement.select("span"); if (elements.size() > 0) { subCategoryTitle = elements.first().text(); } Elements topicElements = subCategoryElement.select("li"); for (Element topicElement : topicElements) { elements = topicElement.select("a"); if (elements.size() == 0) continue; Element element = elements.get(0); Uri uri = Uri.parse(element.attr("href")); Topic topic = new Topic(uri.getQueryParameter("showtopic"), element.text()); m = Pattern.compile("</a>(?:\\s*</b>\\s*-\\s*)(.*)?(?:<br\\s*/>|$)", Pattern.CASE_INSENSITIVE).matcher(topicElement.html()); if (m.find()) topic.setDescription(m.group(1)); topic.setForumTitle(subCategoryTitle); res.add(topic); } } return res; }
Example 10
Source File: OutputFormatter.java From JumpGo with Mozilla Public License 2.0 | 5 votes |
private int append(Element node, StringBuilder sb, String tagName) { int countOfP = 0; // Number of P elements in the article int paragraphWithTextIndex = 0; // is select more costly then getElementsByTag? MAIN: for (Element e : node.select(tagName)) { Element tmpEl = e; // check all elements until 'node' while (tmpEl != null && !tmpEl.equals(node)) { if (unlikely(tmpEl)) continue MAIN; tmpEl = tmpEl.parent(); } String text = node2Text(e); if (text.isEmpty() || text.length() < getMinParagraph(paragraphWithTextIndex) || text.length() > SHelper.countLetters(text) * 2) { continue; } if (e.tagName().equals("p")) { countOfP++; } sb.append(text); sb.append("\n\n"); paragraphWithTextIndex += 1; } return countOfP; }
Example 11
Source File: SelectorTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void deeperDescendant() { String h = "<div class=head><p><span class=first>Hello</div><div class=head><p class=first><span>Another</span><p>Again</div>"; Document doc = Jsoup.parse(h); Element root = doc.getElementsByClass("head").first(); Elements els = root.select("div p .first"); assertEquals(1, els.size()); assertEquals("Hello", els.first().text()); assertEquals("span", els.first().tagName()); Elements aboveRoot = root.select("body p .first"); assertEquals(0, aboveRoot.size()); }
Example 12
Source File: SynonymDiscriminationExtractor.java From superword with Apache License 2.0 | 5 votes |
/** * 解析同义词辨析 * @param html * @return */ public static Set<SynonymDiscrimination> parseSynonymDiscrimination(String html){ Set<SynonymDiscrimination> data = new HashSet<>(); try { for(Element element : Jsoup.parse(html).select(SYNONYM_DISCRIMINATION_CSS_PATH)){ String title = element.select(TITLE).text().trim(); Elements elements = element.select(DES); if(elements.size() != 2){ LOGGER.error("解析描述信息出错,elements.size="+elements.size()); continue; } String des = elements.get(0).text().replace("“ ”", "").replace("“ ", "“").trim(); SynonymDiscrimination synonymDiscrimination = new SynonymDiscrimination(); synonymDiscrimination.setTitle(title); synonymDiscrimination.setDes(des); elements = element.select(WORDS); for(Element ele : elements){ String word = ele.text(); String[] attr = word.split(":"); if(attr != null && attr.length == 2){ synonymDiscrimination.addWord(new Word(attr[0].trim(), attr[1].trim())); }else { LOGGER.error("解析词义信息出错:"+word); } } data.add(synonymDiscrimination); LOGGER.info("解析出同义词辨析:" + synonymDiscrimination); } }catch (Exception e){ LOGGER.error("解析同义词辨析出错", e); } return data; }
Example 13
Source File: MTGoldFishDashBoard.java From MtgDesktopCompanion with GNU General Public License v3.0 | 5 votes |
@Override public List<CardDominance> getBestCards(MagicFormat.FORMATS f, String filter) throws IOException { // spells, creatures, all, lands String u = getString(WEBSITE) + "/format-staples/" + f.name().toLowerCase() + "/full/" + filter; if(f == MagicFormat.FORMATS.COMMANDER) u=getString(WEBSITE) + "/format-staples/commander_1v1/full/" + filter; Document doc = URLTools.extractHtml(u); logger.debug("get best cards : " + u); Elements trs = doc.select("table tr"); trs.remove(0); trs.remove(0); List<CardDominance> ret = new ArrayList<>(); for (Element e : trs) { Elements tds = e.select(MTGConstants.HTML_TAG_TD); try { int correct = filter.equalsIgnoreCase("lands") ? 1 : 0; CardDominance d = new CardDominance(); d.setPosition(Integer.parseInt(tds.get(0).text())); d.setCardName(tds.get(1).text()); d.setDecksPercent(Double.parseDouble(tds.get(3 - correct).text().replaceAll("\\%", ""))); d.setPlayers(Double.parseDouble(tds.get(4 - correct).text().replaceAll("\\%", ""))); ret.add(d); } catch (Exception ex) { logger.error("Error parsing " + tds, ex); } } return ret; }
Example 14
Source File: LessonsTool.java From zhangshangwuda with Apache License 2.0 | 4 votes |
public static List<Map<String, String>> getLessonsList(Context context, String html) { Document doc = null; thtml = html; if (StringUtils.isEmpty(html)) { return null; } doc = Jsoup.parse(thtml); if (doc == null) { return null; } List<Map<String, String>> list = new ArrayList<Map<String, String>>(); Elements lessons = doc.select("tr[align=center]"); for (Element lesson : lessons) { Elements times = lesson.select("td[width=113]"); int weekday = 0; for (Element time : times) { String tinfo = time.text(); if (tinfo.length() < 2) { ++weekday; continue; } else { Map<String, String> map = new HashMap<String, String>(); Integer tid = LessonsSharedPreferencesTool .getLessonsId(context); ++tid; LessonsSharedPreferencesTool.setLessonsId(context, tid); // 设置课程ID map.put("id", String.valueOf(tid)); // 提取课程名 map.put("name", lesson.select("td[width=80]").text()); // 提取教师名 map.put("teacher", lesson.select("td[width=52]").text()); // 提取第几星期上课 ++weekday; map.put("day", Integer.toString(weekday)); // 提取起止周数 int tpos = tinfo.indexOf("周"); map.put("ste", tinfo.substring(0, tpos)); // 提取每几周 tinfo = tinfo.substring(tpos + 3); map.put("mjz", tinfo.substring(0, 1)); // 提取第几节上课 tinfo = tinfo.substring(4); tpos = tinfo.indexOf("节"); map.put("time", tinfo.substring(0, tpos)); // 提取上课地点 if (tinfo.length() > tpos + 2) { tinfo = tinfo.substring(tpos + 2); map.put("place", tinfo.substring(0)); } else { map.put("place", ""); } // 提取备注信息 map.put("other", lesson.select("td[width=100]").text()); list.add(map); } } } return list; }
Example 15
Source File: Utils.java From SteamGifts with MIT License | 4 votes |
/** * Loads giveaways from a list page. * <p>This is not suitable for loading individual giveaway instances from the featured list, as the HTML layout differs (see {@link LoadGiveawayDetailsTask#loadGiveaway(Document, Uri)}</p> * * @param document the loaded document * @return list of giveaways */ public static List<Giveaway> loadGiveawaysFromList(Document document) { Elements giveaways = document.select(".giveaway__row-inner-wrap"); List<Giveaway> giveawayList = new ArrayList<>(); for (Element element : giveaways) { // Basic information Element link = element.select("h2 a").first(); Giveaway giveaway = null; if (link.hasAttr("href")) { Uri linkUri = Uri.parse(link.attr("href")); String giveawayLink = linkUri.getPathSegments().get(1); String giveawayName = linkUri.getPathSegments().get(2); giveaway = new Giveaway(giveawayLink); giveaway.setName(giveawayName); } else { giveaway = new Giveaway(null); giveaway.setName(null); } giveaway.setTitle(link.text()); giveaway.setCreator(element.select(".giveaway__username").text()); // Entries, would usually have comment count too... but we don't display that anywhere. Elements links = element.select(".giveaway__links a span"); giveaway.setEntries(parseInt(links.first().text().split(" ")[0])); giveaway.setEntered(element.hasClass("is-faded")); // More details Elements icons = element.select("h2 a"); Element icon = icons.size() < 2 ? null : icons.get(icons.size() - 2); Uri uriIcon = icon == link || icon == null ? null : Uri.parse(icon.attr("href")); Utils.loadGiveaway(giveaway, element, "giveaway", "giveaway__heading__thin", uriIcon); giveawayList.add(giveaway); } return giveawayList; }
Example 16
Source File: FormatUtil.java From wlmedia with Apache License 2.0 | 4 votes |
public static String formatHomePage(String html, String uri) { StringBuffer buffer = new StringBuffer(); buffer.append("["); Document document = Jsoup.parse(html); Elements elements = document.getElementsByClass("bx-sya"); int size = elements.size(); for(int i = 0; i < size; i++) { Element element = elements.get(i); Elements hd = element.getElementsByClass("hd"); Element hd1 = hd.get(0); buffer.append("{\"type\":\""); buffer.append(hd1.text()); buffer.append("\",\"values\":["); Element bd = element.getElementsByClass("bd").get(0); Elements a = bd.select("a"); int s = a.size(); for(int j = 0; j < s; j++) { Element aa = a.get(j); buffer.append("{\"name\":\""); buffer.append(aa.select("i").text()); buffer.append("\",\"url\":\""); buffer.append(uri); buffer.append(aa.attr("href")); buffer.append("\""); if(j == s - 1) { buffer.append("}"); } else { buffer.append("},"); } } if(i == size - 1) { buffer.append("]}"); } else { buffer.append("]},"); } } buffer.append("]"); return buffer.toString(); }
Example 17
Source File: ParseV9PronVideo.java From v9porn with MIT License | 4 votes |
private static List<V9PornItem> parserByDivContainer(Element container) { List<V9PornItem> v9PornItemList = new ArrayList<>(); Elements select = container.select("div.row>div.col-sm-12>div.row>div"); for (Element item : select) { Element a = item.selectFirst("a"); if (a == null) { continue; } V9PornItem v9PornItem = new V9PornItem(); String title = a.getElementsByClass("video-title").first().text().trim(); v9PornItem.setTitle(title); Element imgEle = a.selectFirst("img.img-responsive"); if (imgEle != null) { v9PornItem.setImgUrl(imgEle.attr("src")); } Element durationEle = a.selectFirst("span.duration"); if (durationEle != null) { v9PornItem.setDuration(durationEle.text().trim()); } else { v9PornItem.setDuration("00:00"); } String contentUrl = a.attr("href"); String viewKey = contentUrl.substring(contentUrl.indexOf("?") + 1); v9PornItem.setViewKey(viewKey); String allInfo = item.text(); // Added: / 添加時間: / 添加时间: int start = allInfo.indexOf("添加时间:"); if (start == -1) { start = allInfo.indexOf("Added:"); if (start == -1) { start = allInfo.indexOf("添加時間:"); } } String info = allInfo.substring(start); try { if (TextUtils.equals(v9PornItem.getDuration(), "00:00")) { String duration = allInfo.substring(allInfo.indexOf("时长:") + 3, allInfo.indexOf("查看")); v9PornItem.setDuration(duration); } } catch (Exception e) { e.printStackTrace(); } v9PornItem.setInfo(info); // Logger.d(info); v9PornItemList.add(v9PornItem); } return v9PornItemList; }
Example 18
Source File: OutputFormatter.java From JumpGo with Mozilla Public License 2.0 | 4 votes |
private static void setParagraphIndex(Element node, String tagName) { int paragraphIndex = 0; for (Element e : node.select(tagName)) { e.attr("paragraphIndex", Integer.toString(paragraphIndex++)); } }
Example 19
Source File: CssSelector.java From webmagic with Apache License 2.0 | 4 votes |
@Override public List<Element> selectElements(Element element) { return element.select(selectorText); }
Example 20
Source File: firstPost.java From Gazetti_Newspaper_Reader with MIT License | 3 votes |
private String getImageURL(Element bodyElement) { Elements mainImageElement = bodyElement.select(ConfigService.getInstance().getFirstPostImage()); if (mainImageElement.size() != 0) { mImageURL = mainImageElement.get(1).attr("src"); } return mImageURL; }