Java Code Examples for org.jsoup.nodes.Element#selectFirst()
The following examples show how to use
org.jsoup.nodes.Element#selectFirst() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GetRatings.java From schedge with MIT License | 6 votes |
private static Float parseRating(String rawData) { rawData = rawData.trim(); if (rawData == null || rawData.equals("")) { logger.warn("Got bad data: empty string"); return null; } Document doc = Jsoup.parse(rawData); Element body = doc.selectFirst("div#root"); if (body == null) return null; Element ratingBody = body.selectFirst("div.TeacherInfo__StyledTeacher-ti1fio-1.fIlNyU"); Element ratingInnerBody = ratingBody.selectFirst("div").selectFirst( "div.RatingValue__AvgRating-qw8sqy-1.gIgExh"); String ratingValue = ratingInnerBody .selectFirst("div.RatingValue__Numerator-qw8sqy-2.gxuTRq") .html() .trim(); try { return Float.parseFloat(ratingValue); } catch (NumberFormatException exception) { logger.warn("The instructor exist but having N/A rating"); return null; } }
Example 2
Source File: ApkVersionHelper.java From XposedSmsCode with GNU General Public License v3.0 | 6 votes |
static ApkVersion parseFromCoolApk(String html) { Document document = Jsoup.parse(html); String versionName = "-1"; String versionInfo = null; if (document != null) { Element element = document.selectFirst("title"); if (element != null) { String text = element.text(); Pattern p = Pattern.compile("\\d(\\.\\d)+"); Matcher m = p.matcher(text); if (m.find()) { versionName = m.group(); } } Element rootInfoEle = document.selectFirst(".apk_left_title:contains(新版特性)"); if (rootInfoEle != null) { Element infoEle = rootInfoEle.selectFirst(".apk_left_title_info"); if (infoEle != null) { versionInfo = HtmlCompat.fromHtml(infoEle.toString(), HtmlCompat.FROM_HTML_MODE_COMPACT) .toString().trim(); } } } return new ApkVersion(versionName, versionInfo); }
Example 3
Source File: GetRatings.java From schedge with MIT License | 5 votes |
private static String parseLink(String rawData) { logger.debug("parsing raw RMP data to link..."); rawData = rawData.trim(); if (rawData == null || rawData.equals("")) { logger.warn("Got bad data: empty string"); return null; } Document doc = Jsoup.parse(rawData); Element body = doc.selectFirst("body.search_results"); Element container = body.selectFirst("div#container"); Element innerBody = container.selectFirst("div#body"); Element mainContent = innerBody.selectFirst("div#mainContent"); Element resBox = mainContent.selectFirst("div#searchResultsBox"); Element listings = resBox.selectFirst("div.listings-wrap"); if (listings == null) { return null; } Element innerListings = listings.selectFirst("ul.listings"); Elements professors = innerListings.select("li.listing.PROFESSOR"); for (Element element : professors) { String school = element.selectFirst("span.sub").toString(); //<- Bugs at this line if (school.contains("New York University") || school.contains("NYU")) { return element.selectFirst("a").attr("href").split("=")[1]; } } return null; }
Example 4
Source File: ParseSection.java From schedge with MIT License | 5 votes |
public static SectionAttribute parse(@NotNull String rawData) { logger.debug("parsing raw catalog section data into SectionAttribute..."); rawData = rawData.trim(); if (rawData.equals("")) { logger.warn("Got bad data: empty string"); return null; // the course doesn't exist } Document doc = Jsoup.parse(rawData); Element failed = doc.selectFirst("div.alert.alert-info"); if (failed != null) { logger.warn("Got bad data: " + failed.text()); return null; // the course doesn't exist } Elements elements = doc.select("a"); String link = null; for (Element element : elements) { String el = element.attr("href"); if (el.contains("mapBuilding")) { link = el; } } doc.select("a").unwrap(); doc.select("i").unwrap(); doc.select("b").unwrap(); Element outerDataSection = doc.selectFirst("body > section.main"); Element innerDataSection = outerDataSection.selectFirst("> section"); Element courseNameDiv = innerDataSection.selectFirst("> div.primary-head"); String courseName = courseNameDiv.text(); Elements dataDivs = innerDataSection.select("> div.section-content.clearfix"); Map<String, String> secData = parseSectionAttributes(dataDivs); return parsingElements(secData, courseName, link); }
Example 5
Source File: ParseEnroll.java From schedge with MIT License | 5 votes |
public static void parseRegistrationNumber(String data) { Document secData = Jsoup.parse(data); Element body = secData.selectFirst("body"); Element section = body.selectFirst("section.main > section"); Elements sections = section.select("div"); for (Element element : sections) { if (element.text().equals("Results") || element.text().equals("Okay")) { continue; } System.out.println(element.text()); } }
Example 6
Source File: HtmlUtil.java From V2EX with GNU General Public License v3.0 | 5 votes |
public static Topic getTopicAndReplies(String html){ Topic topic = new Topic(); Document document = Jsoup.parse(html); Element header = document.selectFirst("#Main > .box"); String headerHtml = header.toString(); Element middleEle = document.selectFirst("#Main > .box > .cell > span"); Element contentEle = header.selectFirst(".topic_content"); Element subtleEle = header.selectFirst(".subtle"); String publishedTime = document.selectFirst("meta[property=article:published_time]") .attr("content") .replaceAll("[TZ]", " "); topic.setCreated(TimeUtil.strToTimestamp(publishedTime,null)); topic.setId(matcherGroup1Int(Pattern.compile("(\\d{2,})"), document.selectFirst("meta[property=og:url]").attr("content"))); topic.setTitle(header.selectFirst(".header > h1").text()); topic.setClicks(matcherGroup1Int(PATTERN_TOPIC_CLICK, headerHtml)); topic.setAgo(matcherGroup1(Pattern.compile("· ([^·]+) ·"), header.selectFirst(".header > small").toString())); topic.setFavors(matcherGroup1Int(PATTERN_TOPIC_FAVORS, headerHtml)); topic.setContent_rendered("\n" + (contentEle == null ? "<br>" : contentEle.toString()) + (subtleEle == null ? " " : subtleEle.toString()) + "\n\t---"); topic.setMember(new Member( matcherGroup1(PATTERN_TOPIC_USERNAME, headerHtml), matcherGroup1(PATTERN_TOPIC_USER_AVATAR, headerHtml))); topic.setNode(new Node( document.selectFirst("meta[property=article:tag]").attr("content"), document.selectFirst("meta[property=article:section]").attr("content"))); if (middleEle != null){ String lastTouched = matcherGroup1(Pattern.compile("直到 ([^+]+)"), middleEle.toString()); topic.setLast_touched(lastTouched.isEmpty() ? 0 : TimeUtil.strToTimestamp(lastTouched,null)); topic.setReplies(matcherGroup1Int(PATTERN_TOPIC_REPLY_COUNT, middleEle.toString())); } topic.setReplyList(getReplies(document, topic.getMember().getUsername())); return topic; }
Example 7
Source File: ParseV9PronVideo.java From v9porn with MIT License | 5 votes |
/** * 解析作者更多视频 * * @param html html * @return list */ public static BaseResult<List<V9PornItem>> parseAuthorVideos(String html) { int totalPage = 1; Document doc = Jsoup.parse(html); Element body = doc.getElementById("wrapper"); Element container = body.selectFirst("div.container"); List<V9PornItem> v9PornItemList = parserByDivContainer(container); //总页数 Element pagingnav = doc.getElementById("paging"); if (pagingnav != null) { Elements a = pagingnav.select("a"); if (a.size() >= 2) { String ppp = a.get(a.size() - 2).text(); if (TextUtils.isDigitsOnly(ppp)) { totalPage = Integer.parseInt(ppp); //Logger.d("总页数:" + totalPage); } } } BaseResult<List<V9PornItem>> baseResult = new BaseResult<>(); baseResult.setTotalPage(totalPage); baseResult.setData(v9PornItemList); return baseResult; }
Example 8
Source File: ParsePxgav.java From v9porn with MIT License | 5 votes |
public static BaseResult<PxgavResultWithBlockId> moreVideoList(String html) { BaseResult<PxgavResultWithBlockId> baseResult = new BaseResult<>(); baseResult.setTotalPage(1); Document doc = Jsoup.parse(html); Elements items = doc.select("article"); List<PxgavModel> pxgavModelList = new ArrayList<>(); for (Element element : items) { PxgavModel pxgavModel = new PxgavModel(); Element a = element.selectFirst("a"); String title = a.attr("title"); pxgavModel.setTitle(title); String contentUrl = a.attr("href"); pxgavModel.setContentUrl(contentUrl); String imgUrl = a.attr("style"); String bigImg = StringUtils.subString(imgUrl, imgUrl.indexOf("url(") + 4, imgUrl.lastIndexOf("-")); Logger.t(TAG).d(bigImg); if (TextUtils.isEmpty(bigImg)) { pxgavModel.setImgUrl(imgUrl); } else { pxgavModel.setImgUrl(bigImg + ".jpg"); } int beginIndex = bigImg.lastIndexOf("/"); int endIndex = bigImg.lastIndexOf("-"); String pId = StringUtils.subString(imgUrl, beginIndex + 1, endIndex); //Logger.t(TAG).d(pId); pxgavModel.setpId(pId); pxgavModelList.add(pxgavModel); } PxgavResultWithBlockId pxgavResultWithBlockId = new PxgavResultWithBlockId(); pxgavResultWithBlockId.setPxgavModelList(pxgavModelList); baseResult.setData(pxgavResultWithBlockId); return baseResult; }
Example 9
Source File: HtmlUtil.java From V2EX with GNU General Public License v3.0 | 5 votes |
private static List<Reply> getReplies(Document document, String poster){ Elements elements = document.select("#Main > .box > .cell[id]"); Iterator<Element> elementIterator = elements.iterator(); List<Reply> replies = new ArrayList<>(elements.size()); for (int f=0; elementIterator.hasNext(); f++) { Element e = elementIterator.next(); Reply reply = new Reply(); Element element = e.selectFirst(".reply_content"); if (element != null){ for (Element img:element.select("img")){ img.attr("width","100%"); img.attr("height","auto"); } reply.setContent(element.html()); }else{ throw new V2exException("This post seems to have been blocked\nEmpty reply content"); } String cell = e.toString(); int id = matcherGroup1Int(PATTERN_REPLY_ID, cell); String username = matcherGroup1(PATTERN_REPLY_USERNAME, cell); String avatarNormal = matcherGroup1(PATTERN_REPLY_AVATAR, cell); reply.setId(id); reply.setMember(new Member(username, avatarNormal)); if (poster != null) reply.setPoster(username.equals(poster)); reply.setAgo(matcherGroup1(PATTERN_REPLY_AGO, cell)); reply.setVia(matcherGroup1(PATTERN_REPLY_VIA, cell)); reply.setLike(matcherGroup1Int(PATTERN_REPLY_LIKE, cell)); reply.setFloor(f); replies.add(reply); } return replies; }
Example 10
Source File: ParsePxgav.java From v9porn with MIT License | 5 votes |
public static BaseResult<PxgavResultWithBlockId> moreVideoList(String html) { BaseResult<PxgavResultWithBlockId> baseResult = new BaseResult<>(); baseResult.setTotalPage(1); Document doc = Jsoup.parse(html); Elements items = doc.select("article"); List<PxgavModel> pxgavModelList = new ArrayList<>(); for (Element element : items) { PxgavModel pxgavModel = new PxgavModel(); Element a = element.selectFirst("a"); String title = a.attr("title"); pxgavModel.setTitle(title); String contentUrl = a.attr("href"); pxgavModel.setContentUrl(contentUrl); String imgUrl = a.attr("style"); String bigImg = StringUtils.subString(imgUrl, imgUrl.indexOf("url(") + 4, imgUrl.lastIndexOf("-")); Logger.t(TAG).d(bigImg); if (TextUtils.isEmpty(bigImg)) { pxgavModel.setImgUrl(imgUrl); } else { pxgavModel.setImgUrl(bigImg + ".jpg"); } int beginIndex = bigImg.lastIndexOf("/"); int endIndex = bigImg.lastIndexOf("-"); String pId = StringUtils.subString(imgUrl, beginIndex + 1, endIndex); //Logger.t(TAG).d(pId); pxgavModel.setpId(pId); pxgavModelList.add(pxgavModel); } PxgavResultWithBlockId pxgavResultWithBlockId = new PxgavResultWithBlockId(); pxgavResultWithBlockId.setPxgavModelList(pxgavModelList); baseResult.setData(pxgavResultWithBlockId); return baseResult; }
Example 11
Source File: HtmlHelper.java From FairEmail with GNU General Public License v3.0 | 5 votes |
private static boolean hasVisibleContent(List<Node> nodes) { for (Node node : nodes) if (node instanceof TextNode && !((TextNode) node).isBlank()) return true; else if (node instanceof Element) { Element element = (Element) node; if (!element.isBlock() && (element.hasText() || element.selectFirst("a") != null || element.selectFirst("img") != null)) return true; } return false; }
Example 12
Source File: Parse99Mm.java From v9porn with MIT License | 4 votes |
public static BaseResult<List<Mm99>> parse99MmList(String html, int page) { BaseResult<List<Mm99>> baseResult = new BaseResult<>(); baseResult.setTotalPage(1); Logger.t(TAG).d(html); Document doc = Jsoup.parse(html); Element ul = doc.getElementById("piclist"); Elements lis = ul.select("li"); List<Mm99> mm99List = new ArrayList<>(); for (Element li : lis) { Mm99 mm99 = new Mm99(); Element a = li.selectFirst("dt").selectFirst("a"); String contentUrl = "http://www.99mm.me" + a.attr("href"); mm99.setContentUrl(contentUrl); int startIndex = contentUrl.lastIndexOf("/"); int endIndex = contentUrl.lastIndexOf("."); String idStr = StringUtils.subString(contentUrl, startIndex + 1, endIndex); if (!TextUtils.isEmpty(idStr) && TextUtils.isDigitsOnly(idStr)) { mm99.setId(Integer.parseInt(idStr)); } else { Logger.t(TAG).d(idStr); } Element img = a.selectFirst("img"); String title = img.attr("alt"); mm99.setTitle(title); String imgUrl = img.attr("src"); HttpUrl httpUrl = HttpUrl.parse(imgUrl); if (httpUrl == null) { imgUrl = img.attr("data-img"); } Logger.t(TAG).d("图片链接::" + imgUrl); mm99.setImgUrl(imgUrl); int imgWidth = Integer.parseInt(img.attr("width")); mm99.setImgWidth(imgWidth); mm99List.add(mm99); } if (page == 1) { Element pageElement = doc.getElementsByClass("all").first(); if (pageElement != null) { String pageStr = pageElement.text().replace("...", "").trim(); if (!TextUtils.isEmpty(pageStr) && TextUtils.isDigitsOnly(pageStr)) { baseResult.setTotalPage(Integer.parseInt(pageStr)); } else { Logger.t(TAG).d(pageStr); } } } baseResult.setData(mm99List); return baseResult; }
Example 13
Source File: BookSourceAnalysisServiceImpl.java From DouBiNovel with Apache License 2.0 | 4 votes |
@Override public Future<MvcResult> searchByName(String name, BookSource bookSource) { String url = String.format(bookSource.getBaseUrl() + bookSource.getSearchUrl(), name); MvcResult result = MvcResult.create(); try { String responseStr = netWorkService.get(url, headers); if (StringUtils.isNotEmpty(responseStr)){ List<BookInfo> list = new ArrayList<>(); Document html = Jsoup.parse(responseStr); Elements resultListElements = html.select(bookSource.getSearchResultSelector()); for (Element resultItem : resultListElements) { BookInfo info = new BookInfo(); if (StringUtils.isNotBlank(bookSource.getResultItemBookNameSelector())) { Element nameElem = resultItem.selectFirst(bookSource.getResultItemBookNameSelector()); if (nameElem != null) { info.setName(nameElem.text()); } } if (StringUtils.isNotBlank(bookSource.getResultItemBookUrlSelector())) { Element bookUrlElem = resultItem.selectFirst(bookSource.getResultItemBookUrlSelector()); if (bookUrlElem != null) { info.setBookUrl(bookUrlElem.attr("href")); if (StringUtils.isBlank(info.getBookUrl())) { continue; } else if (!info.getBookUrl().contains(bookSource.getBaseUrl())) { info.setBookUrl(bookSource.getBaseUrl() + info.getBookUrl()); } } } if (StringUtils.isNotBlank(bookSource.getResultItemBookImageUrlSelector())) { Element bookImgElem = resultItem.selectFirst(bookSource.getResultItemBookImageUrlSelector()); if (bookImgElem != null) { info.setBookImg(bookImgElem.attr("src")); if (StringUtils.isBlank(info.getBookImg())) { continue; } else if (!info.getBookImg().contains(bookSource.getBaseUrl())) { info.setBookImg(bookSource.getBaseUrl() + info.getBookImg()); } } } info.setBookImgError(bookSource.getImageError()); if (StringUtils.isNotBlank(bookSource.getResultItemBookDesSelector())) { Element bookDesElem = resultItem.selectFirst(bookSource.getResultItemBookDesSelector()); if (bookDesElem != null) { info.setNovelDes(bookDesElem.text()); } } if (StringUtils.isNotBlank(bookSource.getResultItemBookAuthorSelector())) { Element authorElem = resultItem.selectFirst(bookSource.getResultItemBookAuthorSelector()); if (authorElem != null) { info.setAuthor(authorElem.text()); } } if (StringUtils.isNotBlank(bookSource.getResultItemBookTypeSelector())) { Element novelTypeElem = resultItem.selectFirst(bookSource.getResultItemBookTypeSelector()); if (novelTypeElem != null) { info.setNovelType(novelTypeElem.text()); } } if (StringUtils.isNotBlank(bookSource.getResultItemBookLastUpdateSelector())) { Element updateElem = resultItem.selectFirst(bookSource.getResultItemBookLastUpdateSelector()); if (updateElem != null) { info.setLastUpdate(updateElem.text()); } } if (StringUtils.isNotBlank(bookSource.getResultItemBookLastNewSelector())) { Element lastNewElem = resultItem.selectFirst(bookSource.getResultItemBookLastNewSelector()); if (lastNewElem != null) { info.setLastNew(lastNewElem.text()); } } info.setBookSourceLink(info.getBookUrl()); info.setBookSourceName(bookSource.getName()); list.add(info); } result.addVal("list", list); }else { result.setSuccess(false); result.setMessage("查询结果为空"); } } catch (Exception e) { logger.error("查找书籍出错", e); result.setSuccess(false); result.setMessage(e.getMessage()); } return new AsyncResult<MvcResult>(result); }
Example 14
Source File: ParsePxgav.java From v9porn with MIT License | 4 votes |
public static BaseResult<PxgavResultWithBlockId> videoList(String html, boolean isLoadMoreData) { BaseResult<PxgavResultWithBlockId> baseResult = new BaseResult<>(); PxgavResultWithBlockId pxgavResultWithBlockId = new PxgavResultWithBlockId(); baseResult.setTotalPage(1); Logger.t(TAG).d(html); Document doc = Jsoup.parse(html); Elements items = doc.getElementsByClass("penci-block_content").first().select("article"); List<PxgavModel> pxgavModelList = new ArrayList<>(); for (Element element : items) { PxgavModel pxgavModel = new PxgavModel(); Element a = element.selectFirst("a"); String title = a.attr("title"); pxgavModel.setTitle(title); String contentUrl = a.attr("href"); pxgavModel.setContentUrl(contentUrl); String imgUrl = a.attr("style"); String bigImg = StringUtils.subString(imgUrl, imgUrl.indexOf("url(") + 4, imgUrl.lastIndexOf("-")); Logger.t(TAG).d(bigImg); if (TextUtils.isEmpty(bigImg)) { pxgavModel.setImgUrl(imgUrl); } else { pxgavModel.setImgUrl(bigImg + ".jpg"); } int beginIndex = bigImg.lastIndexOf("/"); int endIndex = bigImg.lastIndexOf("-"); String pId = StringUtils.subString(imgUrl, beginIndex + 1, endIndex); //Logger.t(TAG).d(pId); pxgavModel.setpId(pId); pxgavModelList.add(pxgavModel); } pxgavResultWithBlockId.setPxgavModelList(pxgavModelList); if (isLoadMoreData) { baseResult.setData(pxgavResultWithBlockId); return baseResult; } //解析加载更多需要的数据 Elements elements = doc.getElementsByClass("wpb_wrapper"); String[] data = elements.last().getElementsByTag("script").html().split(";"); String label = ".id = \""; for (String dat : data) { if (dat.contains(label)) { int startIndex = dat.indexOf(label); Logger.t(TAG).d(dat); try { String blockId = dat.substring(startIndex + label.length()).replace("\"", ""); pxgavResultWithBlockId.setBlockId(blockId); Logger.t(TAG).d("blockId数据:" + blockId); } catch (Exception e) { Logger.t(TAG).e("无法获取blockId"); } break; } } baseResult.setData(pxgavResultWithBlockId); return baseResult; }
Example 15
Source File: ParseMeiZiTu.java From v9porn with MIT License | 4 votes |
public static BaseResult<List<MeiZiTu>> parseMeiZiTuList(String html, int page) { BaseResult<List<MeiZiTu>> baseResult = new BaseResult<>(); baseResult.setTotalPage(1); Document doc = Jsoup.parse(html); Element ulPins = doc.getElementById("pins"); Elements lis = ulPins.select("li"); List<MeiZiTu> meiZiTuList = new ArrayList<>(); for (Element li : lis) { Element contentElement = li.select("a").first(); if (contentElement == null) { continue; } MeiZiTu meiZiTu = new MeiZiTu(); String contentUrl = contentElement.attr("href"); //meiZiTu.setContentUrl(contentUrl); int index = contentUrl.lastIndexOf("/"); if (index >= 0 && index + 1 < contentUrl.length()) { String idStr = contentUrl.substring(index + 1, contentUrl.length()); Logger.t(TAG).d(idStr); if (!TextUtils.isEmpty(idStr) && TextUtils.isDigitsOnly(idStr)) { meiZiTu.setId(Integer.parseInt(idStr)); } } Element imageElement = li.selectFirst("img"); String name = imageElement.attr("alt"); meiZiTu.setName(name); String thumbUrl = imageElement.attr("data-original"); meiZiTu.setThumbUrl(thumbUrl); Logger.t(TAG).d(thumbUrl); int height = Integer.parseInt(imageElement.attr("height")); meiZiTu.setHeight(height); int width = Integer.parseInt(imageElement.attr("width")); meiZiTu.setWidth(width); String date = li.getElementsByClass("time").first().text(); meiZiTu.setDate(date); // String viewCount = li.getElementsByClass("view").first().text(); // meiZiTu.setViewCount(viewCount); meiZiTuList.add(meiZiTu); } Logger.t(TAG).d("size::" + meiZiTuList.size()); if (page == 1) { Elements pageElements = doc.getElementsByClass("page-numbers"); if (pageElements != null && pageElements.size() > 3) { String pageStr = pageElements.get(pageElements.size() - 2).text(); Logger.t(TAG).d("totalPage::" + pageStr); if (!TextUtils.isEmpty(pageStr) && TextUtils.isDigitsOnly(pageStr)) { baseResult.setTotalPage(Integer.parseInt(pageStr)); } } } baseResult.setData(meiZiTuList); return baseResult; }
Example 16
Source File: Parse99Mm.java From v9porn with MIT License | 4 votes |
public static BaseResult<List<Mm99>> parse99MmList(String html, int page) { BaseResult<List<Mm99>> baseResult = new BaseResult<>(); baseResult.setTotalPage(1); Logger.t(TAG).d(html); Document doc = Jsoup.parse(html); Element ul = doc.getElementById("piclist"); Elements lis = ul.select("li"); List<Mm99> mm99List = new ArrayList<>(); for (Element li : lis) { Mm99 mm99 = new Mm99(); Element a = li.selectFirst("dt").selectFirst("a"); String contentUrl = "http://www.99mm.me" + a.attr("href"); mm99.setContentUrl(contentUrl); int startIndex = contentUrl.lastIndexOf("/"); int endIndex = contentUrl.lastIndexOf("."); String idStr = StringUtils.subString(contentUrl, startIndex + 1, endIndex); if (!TextUtils.isEmpty(idStr) && TextUtils.isDigitsOnly(idStr)) { mm99.setId(Integer.parseInt(idStr)); } else { Logger.t(TAG).d(idStr); } Element img = a.selectFirst("img"); String title = img.attr("alt"); mm99.setTitle(title); String imgUrl = img.attr("src"); HttpUrl httpUrl = HttpUrl.parse(imgUrl); if (httpUrl == null) { imgUrl = img.attr("data-img"); } Logger.t(TAG).d("图片链接::" + imgUrl); mm99.setImgUrl(imgUrl); int imgWidth = Integer.parseInt(img.attr("width")); mm99.setImgWidth(imgWidth); mm99List.add(mm99); } if (page == 1) { Element pageElement = doc.getElementsByClass("all").first(); if (pageElement != null) { String pageStr = pageElement.text().replace("...", "").trim(); if (!TextUtils.isEmpty(pageStr) && TextUtils.isDigitsOnly(pageStr)) { baseResult.setTotalPage(Integer.parseInt(pageStr)); } else { Logger.t(TAG).d(pageStr); } } } baseResult.setData(mm99List); return baseResult; }
Example 17
Source File: ParsePxgav.java From v9porn with MIT License | 4 votes |
public static BaseResult<PxgavResultWithBlockId> videoList(String html, boolean isLoadMoreData) { BaseResult<PxgavResultWithBlockId> baseResult = new BaseResult<>(); PxgavResultWithBlockId pxgavResultWithBlockId = new PxgavResultWithBlockId(); baseResult.setTotalPage(1); Logger.t(TAG).d(html); Document doc = Jsoup.parse(html); Elements items = doc.getElementsByClass("penci-block_content").first().select("article"); List<PxgavModel> pxgavModelList = new ArrayList<>(); for (Element element : items) { PxgavModel pxgavModel = new PxgavModel(); Element a = element.selectFirst("a"); String title = a.attr("title"); pxgavModel.setTitle(title); String contentUrl = a.attr("href"); pxgavModel.setContentUrl(contentUrl); String imgUrl = a.attr("style"); String bigImg = StringUtils.subString(imgUrl, imgUrl.indexOf("url(") + 4, imgUrl.lastIndexOf("-")); Logger.t(TAG).d(bigImg); if (TextUtils.isEmpty(bigImg)) { pxgavModel.setImgUrl(imgUrl); } else { pxgavModel.setImgUrl(bigImg + ".jpg"); } int beginIndex = bigImg.lastIndexOf("/"); int endIndex = bigImg.lastIndexOf("-"); String pId = StringUtils.subString(imgUrl, beginIndex + 1, endIndex); //Logger.t(TAG).d(pId); pxgavModel.setpId(pId); pxgavModelList.add(pxgavModel); } pxgavResultWithBlockId.setPxgavModelList(pxgavModelList); if (isLoadMoreData) { baseResult.setData(pxgavResultWithBlockId); return baseResult; } //解析加载更多需要的数据 Elements elements = doc.getElementsByClass("wpb_wrapper"); String[] data = elements.last().getElementsByTag("script").html().split(";"); String label = ".id = \""; for (String dat : data) { if (dat.contains(label)) { int startIndex = dat.indexOf(label); Logger.t(TAG).d(dat); try { String blockId = dat.substring(startIndex + label.length()).replace("\"", ""); pxgavResultWithBlockId.setBlockId(blockId); Logger.t(TAG).d("blockId数据:" + blockId); } catch (Exception e) { Logger.t(TAG).e("无法获取blockId"); } break; } } baseResult.setData(pxgavResultWithBlockId); return baseResult; }
Example 18
Source File: ParsePxgav.java From v9porn with MIT License | 4 votes |
/** * @param html 原网页 * @return json=== */ public static BaseResult<PxgavVideoParserJsonResult> parserVideoUrl(String html) { BaseResult<PxgavVideoParserJsonResult> baseResult = new BaseResult<>(); Document document = Jsoup.parse(html); Element videoWrapper = document.getElementsByClass("penci-entry-content entry-content").first(); String videoHtml = videoWrapper.html(); Logger.t(TAG).d(videoHtml); int index = videoHtml.indexOf("setup") + 6; int endIndexV = videoHtml.indexOf(");"); String videoUrl = videoHtml.substring(index, endIndexV); Logger.t(TAG).d(videoUrl); PxgavVideoParserJsonResult pxgavVideoParserJsonResult = new Gson().fromJson(videoUrl, PxgavVideoParserJsonResult.class); Elements items = document.getElementsByClass("penci-block_content").first().select("article"); List<PxgavModel> pxgavModelList = new ArrayList<>(); for (Element element : items) { PxgavModel pxgavModel = new PxgavModel(); Element a = element.selectFirst("a"); String title = a.attr("title"); pxgavModel.setTitle(title); String contentUrl = a.attr("href"); pxgavModel.setContentUrl(contentUrl); String imgUrl = a.attr("style"); String bigImg = StringUtils.subString(imgUrl, imgUrl.indexOf("url(") + 4, imgUrl.lastIndexOf("-")); Logger.t(TAG).d(bigImg); if (TextUtils.isEmpty(bigImg)) { pxgavModel.setImgUrl(imgUrl); } else { pxgavModel.setImgUrl(bigImg + ".jpg"); } int beginIndex = bigImg.lastIndexOf("/"); int endIndex = bigImg.lastIndexOf("-"); String pId = StringUtils.subString(imgUrl, beginIndex + 1, endIndex); //Logger.t(TAG).d(pId); pxgavModel.setpId(pId); pxgavModelList.add(pxgavModel); } pxgavVideoParserJsonResult.setPxgavModelList(pxgavModelList); baseResult.setData(pxgavVideoParserJsonResult); return baseResult; }
Example 19
Source File: ParseMeiZiTu.java From v9porn with MIT License | 4 votes |
public static BaseResult<List<MeiZiTu>> parseMeiZiTuList(String html, int page) { BaseResult<List<MeiZiTu>> baseResult = new BaseResult<>(); baseResult.setTotalPage(1); Document doc = Jsoup.parse(html); Element ulPins = doc.getElementById("pins"); Elements lis = ulPins.select("li"); List<MeiZiTu> meiZiTuList = new ArrayList<>(); for (Element li : lis) { Element contentElement = li.select("a").first(); if (contentElement == null) { continue; } MeiZiTu meiZiTu = new MeiZiTu(); String contentUrl = contentElement.attr("href"); //meiZiTu.setContentUrl(contentUrl); int index = contentUrl.lastIndexOf("/"); if (index >= 0 && index + 1 < contentUrl.length()) { String idStr = contentUrl.substring(index + 1, contentUrl.length()); Logger.t(TAG).d(idStr); if (!TextUtils.isEmpty(idStr) && TextUtils.isDigitsOnly(idStr)) { meiZiTu.setId(Integer.parseInt(idStr)); } } Element imageElement = li.selectFirst("img"); String name = imageElement.attr("alt"); meiZiTu.setName(name); String thumbUrl = imageElement.attr("data-original"); meiZiTu.setThumbUrl(thumbUrl); Logger.t(TAG).d(thumbUrl); int height = Integer.parseInt(imageElement.attr("height")); meiZiTu.setHeight(height); int width = Integer.parseInt(imageElement.attr("width")); meiZiTu.setWidth(width); String date = li.getElementsByClass("time").first().text(); meiZiTu.setDate(date); // String viewCount = li.getElementsByClass("view").first().text(); // meiZiTu.setViewCount(viewCount); meiZiTuList.add(meiZiTu); } Logger.t(TAG).d("size::" + meiZiTuList.size()); if (page == 1) { Elements pageElements = doc.getElementsByClass("page-numbers"); if (pageElements != null && pageElements.size() > 3) { String pageStr = pageElements.get(pageElements.size() - 2).text(); Logger.t(TAG).d("totalPage::" + pageStr); if (!TextUtils.isEmpty(pageStr) && TextUtils.isDigitsOnly(pageStr)) { baseResult.setTotalPage(Integer.parseInt(pageStr)); } } } baseResult.setData(meiZiTuList); return baseResult; }
Example 20
Source File: ParseV9PronVideo.java From v9porn with MIT License | 3 votes |
/** * 解析主页 * * @param html 主页html * @return 视频列表 */ public static List<V9PornItem> parseIndex(String html) { Document doc = Jsoup.parse(html); Element body = doc.getElementById("wrapper"); Element container = body.selectFirst("div.container"); return parserByDivContainer(container); }