Java Code Examples for org.jsoup.nodes.Element#absUrl()
The following examples show how to use
org.jsoup.nodes.Element#absUrl() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PUBGNewsFetch.java From Companion-For-PUBG-Android with MIT License | 6 votes |
@Override protected Void doInBackground(final Void... voids) { final Document document; try { document = Jsoup.connect(PUBG_NEWS_LINK).get(); for (final Element row : document.select(NEWS_DOCUMENT_SELECTOR)) { final String title = row.select(NEWS_TITLE_SELECTOR).text(); final String type = row.select(NEWS_TYPE_SELECTOR).text(); final String date = row.select(NEWS_DATE_SELECTOR).text(); final String description = row.select(NEWS_DESCRIPTION_SELECTOR).text(); final Element link = row.select(NEWS_LINK_SELECTOR).first(); final String linkSrc = link.absUrl(NEWS_LINKSRC_SELECTOR); final Element img = row.select(NEWS_IMG_SELECTOR).first(); final String imgSrc = img.absUrl(NEWS_IMGSRC_SELECTOR); publishProgress(new NewsItem(title, type, date, description, linkSrc, imgSrc)); } } catch(IOException e) { e.printStackTrace(); } return null; }
Example 2
Source File: Whitelist.java From jsoup-learning with MIT License | 6 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString() + ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example 3
Source File: AbstractDownloadableLinkRuleImplementation.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
@Override protected void select(SSPHandler sspHandler) { super.select(sspHandler); Iterator<Element> iter = getElements().get().iterator(); Element el; while (iter.hasNext()){ el = iter.next(); try { URI uri = new URI(el.absUrl(HREF_ATTR)); if (isLinkWithProperExtension(uri)) { if (StringUtils.isNotBlank(uri.getFragment())) { iter.remove(); } else { linkWithSimpleExtension.add(el); } } } catch (Exception ex){} } }
Example 4
Source File: HtmlParse.java From ChipHellClient with Apache License 2.0 | 6 votes |
/** * 解析相册 * * @param responseBody * @return */ public static AlbumWrap parseAubum(String responseBody) { AlbumWrap albumWrap = new AlbumWrap(); List<String> albums = new ArrayList<String>(); Document document = Jsoup.parse(responseBody); document.setBaseUri(Constants.BASE_URL); Elements elements = document.getElementsByClass("postalbum_i"); for (Element album : elements) { String url = album.absUrl("orig"); albums.add(url); } albumWrap.setUrls(albums); String strCurpic = document.getElementById("curpic").text(); int curpic = Integer.valueOf(strCurpic) - 1; albumWrap.setCurPosition(curpic); return albumWrap; }
Example 5
Source File: LoadMessagesTask.java From SteamGifts with MIT License | 6 votes |
private List<IEndlessAdaptable> loadMessages(Document document) { List<IEndlessAdaptable> list = new ArrayList<>(); Elements children = document.select(".comments__entity"); for (Element element : children) { Element link = element.select(".comments__entity__name a").first(); if (link != null) { MessageHeader message = new MessageHeader(link.text(), link.absUrl("href")); Element commentElement = element.nextElementSibling(); if (commentElement != null) Utils.loadComments(commentElement, message, Comment.Type.COMMENT); // add the message & all associated comments. list.add(message); list.addAll(message.getComments()); } } return list; }
Example 6
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (lowerCase(value).startsWith(prot)) { return true; } } return false; }
Example 7
Source File: SishuokWhitelist.java From es with Apache License 2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // resolve relative urls to abs, and update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString() + ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example 8
Source File: HtmlParse.java From ChipHellClient with Apache License 2.0 | 5 votes |
/** * 解析引用回复的准备数据 * * @param responseBody * @return */ public static PrepareQuoteReply parsePrepareQuoteReply(String responseBody) { PrepareQuoteReply quoteReply = new PrepareQuoteReply(); try { Document document = Jsoup.parse(responseBody); document.setBaseUri(Constants.BASE_URL); Element postform = document.getElementById("postform"); String url = postform.absUrl("action"); String formhash = postform.getElementsByAttributeValue("name", "formhash").first().attr("value"); String posttime = postform.getElementsByAttributeValue("name", "posttime").first().attr("value"); String noticeauthor = postform.getElementsByAttributeValue("name", "noticeauthor").first().attr("value"); String noticetrimstr = postform.getElementsByAttributeValue("name", "noticetrimstr").first().attr("value"); String noticeauthormsg = postform.getElementsByAttributeValue("name", "noticeauthormsg").first().attr("value"); String reppid = postform.getElementsByAttributeValue("name", "reppid").first().attr("value"); String reppost = postform.getElementsByAttributeValue("name", "reppost").first().attr("value"); String quoteBody = postform.getElementsByTag("blockquote").first().toString(); quoteReply.setNoticeauthor(noticeauthor); quoteReply.setNoticeauthormsg(noticeauthormsg); quoteReply.setNoticetrimstr(noticetrimstr); quoteReply.setPosttime(posttime); quoteReply.setQuoteBody(quoteBody); quoteReply.setReppid(reppid); quoteReply.setUrl(url); quoteReply.setFormhash(formhash); quoteReply.setReppost(reppost); } catch (Exception e) { e.printStackTrace(); } return quoteReply; }
Example 9
Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0 | 5 votes |
void maybeSetBaseUri(Element base) { if (baseUriSetFromDoc) // only listen to the first <base href> in parse return; String href = base.absUrl("href"); if (href.length() != 0) { // ignore <base target> etc baseUri = href; baseUriSetFromDoc = true; doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants } }
Example 10
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (lowerCase(value).startsWith(prot)) { return true; } } return false; }
Example 11
Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0 | 5 votes |
void maybeSetBaseUri(Element base) { if (baseUriSetFromDoc) // only listen to the first <base href> in parse return; String href = base.absUrl("href"); if (href.length() != 0) { // ignore <base target> etc baseUri = href; baseUriSetFromDoc = true; doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants } }
Example 12
Source File: Free_yitianjianssCrawlerServiceImpl.java From ShadowSocks-Share with Apache License 2.0 | 5 votes |
/** * 网页内容解析 ss 信息 * * @param document */ @Override protected Set<ShadowSocksDetailsEntity> parse(Document document) { Elements ssList = document.select("div.image > img"); Set<ShadowSocksDetailsEntity> set = new HashSet<>(ssList.size()); for (int i = 0; i < ssList.size(); i++) { try { Element element = ssList.get(i); // 取 src 信息 String src = element.absUrl("src"); ShadowSocksDetailsEntity ss = parseURL(src); ss.setValid(false); ss.setValidTime(new Date()); ss.setTitle(document.title()); ss.setRemarks(TARGET_URL); ss.setGroup("ShadowSocks-Share"); // 测试网络 if (isReachable(ss)) ss.setValid(true); // 无论是否可用都入库 set.add(ss); log.debug("*************** 第 {} 条 ***************{}{}", i + 1, System.lineSeparator(), ss); // log.debug("{}", ss.getLink()); } catch (Exception e) { log.error(e.getMessage(), e); } } return set; }
Example 13
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example 14
Source File: StructuralAnnotations.java From baleen with Apache License 2.0 | 5 votes |
private Structure createAnchor(final JCas jCas, final Element element) { String href = element.absUrl("href"); if (Strings.isNullOrEmpty(href)) { href = element.attr("href"); } if (!Strings.isNullOrEmpty(href)) { final Link l = new Link(jCas); l.setTarget(href); return l; } else { return new Anchor(jCas); } }
Example 15
Source File: HtmlConverter.java From docx4j-template with Apache License 2.0 | 5 votes |
/** * 将页面转为{@link org.jsoup.nodes.Document}对象,xhtml 格式 * * @param url * @return * @throws Exception */ protected Document url2xhtml(String url) throws Exception { Document doc = Jsoup.connect(url).get(); //获得 if (logger.isDebugEnabled()) { logger.debug("baseUri: {}", doc.baseUri()); } for (Element script : doc.getElementsByTag("script")) { //除去所有 script script.remove(); } for (Element a : doc.getElementsByTag("a")) { //除去 a 的 onclick,href 属性 a.removeAttr("onclick"); a.removeAttr("href"); } Elements links = doc.getElementsByTag("link"); //将link中的地址替换为绝对地址 for (Element element : links) { String href = element.absUrl("href"); if (logger.isDebugEnabled()) { logger.debug("href: {} -> {}", element.attr("href"), href); } element.attr("href", href); } doc.outputSettings() .syntax(Document.OutputSettings.Syntax.xml) .escapeMode(Entities.EscapeMode.xhtml); //转为 xhtml 格式 if (logger.isDebugEnabled()) { String[] split = doc.html().split("\n"); for (int c = 0; c < split.length; c++) { logger.debug("line {}:\t{}", c + 1, split[c]); } } return doc; }
Example 16
Source File: VidbleRipper.java From ripme with MIT License | 5 votes |
private static List<String> getURLsFromPageStatic(Document doc) { List<String> imageURLs = new ArrayList<>(); Elements els = doc.select("#ContentPlaceHolder1_divContent"); Elements imgs = els.select("img"); for (Element img : imgs) { String src = img.absUrl("src"); src = src.replaceAll("_[a-zA-Z]{3,5}", ""); if (!src.equals("")) { imageURLs.add(src); } } return imageURLs; }
Example 17
Source File: HtmlParser.java From gecco with MIT License | 4 votes |
public String $image(Element img, String attr) { if (img == null) { return null; } return img.absUrl(attr); }
Example 18
Source File: CurseCrawler.java From TinkerTime with GNU General Public License v3.0 | 4 votes |
@Override public URL getImageUrl() throws IOException { Document mainPage = getPage(getApiUrl()); Element ele = mainPage.select("img.primary-project-attachment").first(); return new URL(ele.absUrl("src")); }
Example 19
Source File: HtmlParse.java From ChipHellClient with Apache License 2.0 | 4 votes |
/** * 解析板块列表 * * @param content * @return */ public static List<PlateGroup> parsePlateGroupList(String content) { List<PlateGroup> groups = new ArrayList<PlateGroup>(); Document document = Jsoup.parse(content); document.setBaseUri(Constants.BASE_URL); Elements elementsGroup = document.getElementsByClass("bm"); for (Element bm : elementsGroup) { PlateGroup plateGroup = new PlateGroup(); Element bm_h = bm.getElementsByClass("bm_h").first(); String title = bm_h.text(); plateGroup.setTitle(title); List<Plate> plates = new ArrayList<Plate>(); Elements plateElements = bm.getElementsByClass("bm_c"); for (Element bm_c : plateElements) { Plate plate = new Plate(); //链接,第一个是版块链接,如果有第二个则是删除收藏连接 Elements as = bm_c.getElementsByTag("a"); Element a1 = as.first(); String plateTitle = a1.text(); String url = a1.absUrl("href"); Elements count = bm_c.getElementsByClass("xg1"); String xg1 = null; if (count.size() != 0) { xg1 = count.first().text(); } else { xg1 = "(0)"; } //判断是否收藏 String favoriteId = null; if (as.size() > 1) { String urlDelete = as.get(1).absUrl("href"); favoriteId = new UrlParamsMap(urlDelete).get("favid"); } plate.setTitle(plateTitle); plate.setUrl(url); plate.setXg1(xg1); plate.setFavoriteId(favoriteId); plates.add(plate); } plateGroup.setPlates(plates); groups.add(plateGroup); } return groups; }
Example 20
Source File: HtmlParser.java From gecco with MIT License | 4 votes |
public String $href(Element href, String attr) { if (href == null) { return null; } return href.absUrl(attr); }