Java Code Examples for org.jsoup.select.Elements#get()
The following examples show how to use
org.jsoup.select.Elements#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ResourceQuote.java From templatespider with Apache License 2.0 | 6 votes |
/** * Tag标签的引用资源替换,替换为绝对路径 * @param doc Document,整个页面 * @param tagName tag的名字,如 img、 script * @param tagProperty 上面的tag中资源引用的标签,如 src * @return 替换好的Document */ public Document tagReplace(Document doc, String tagName, String tagProperty){ Elements imgElements = doc.getElementsByTag(tagName); for (int i = 0; i < imgElements.size(); i++) { Element e = imgElements.get(i); String url = e.attr(tagProperty); if(url.length() > 3 && url.indexOf(baseUri) == -1){ String absUrl = hierarchyReplace(this.baseUri, url); if((!url.equals(absUrl)) && url.indexOf("://") == -1){ //如果url未替换过,且不是绝对路径,那么进行替换操作 e.attr(tagProperty, absUrl); } } } return doc; }
Example 2
Source File: SearchThread.java From tv-search with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void run() { // TODO Auto-generated method stub try { if(keyWord == null) { return; } String temp = Const.REQUESTURL+keyWord; Document doc = Jsoup.connect(Const.REQUESTURL+keyWord).get(); Elements test = doc.select("div.main_content"); org.jsoup.nodes.Element element = test.get(0); Elements test3 = element.getAllElements(); data = element.toString(); }catch(Exception e){ e.printStackTrace(); }finally{ Const.data = data; } }
Example 3
Source File: TestPutHTMLElement.java From localization_nifi with Apache License 2.0 | 6 votes |
@Test public void testAddNewElementToRoot() throws Exception { final String MOD_VALUE = "<p>modified value</p>"; testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "body"); testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT); testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE); testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); testRunner.run(); testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1); testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0); testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1); testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0); List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS); assertTrue(ffs.size() == 1); String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. Document doc = Jsoup.parse(data); Elements eles = doc.select("body > p"); Element ele = eles.get(0); assertTrue(StringUtils.equals(MOD_VALUE.replace("<p>", "").replace("</p>", ""), ele.html())); }
Example 4
Source File: SearchUtils.java From emotional_analysis with Apache License 2.0 | 6 votes |
/** * 获取歌曲名称 * <p>Title: getSongNameById</p> * <p>Description: </p> * @param songId * @return * @throws Exception */ public static String getSongNameById(long songId) throws Exception{ String songName = null; Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + songId) .header("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36") .header("Cache-Control", "no-cache").timeout(2000000000) .execute(); Document parse = execute.parse(); Elements elementsByClass = parse.getElementsByClass("f-ff2"); if(elementsByClass.size() > 0){ Element element = elementsByClass.get(0); Node childNode = element.childNode(0); songName = childNode.toString(); }else{ songName = "ES中歌曲在网易云音乐中找不到"; } return songName; }
Example 5
Source File: IPSpider.java From HttpProxy with GNU General Public License v3.0 | 6 votes |
private List<IPBean> crawl(String api, int index){ String html = HttpUtils.getResponseContent(api + index); System.out.println(html); Document document = Jsoup.parse(html); Elements eles = document.selectFirst("table").select("tr"); for (int i = 0; i < eles.size(); i++){ if (i == 0) continue; Element ele = eles.get(i); String ip = ele.children().get(1).text(); int port = Integer.parseInt(ele.children().get(2).text().trim()); String typeStr = ele.children().get(5).text().trim(); int type; if ("HTTP".equalsIgnoreCase(typeStr)) type = IPBean.TYPE_HTTP; else type = IPBean.TYPE_HTTPS; IPBean ipBean = new IPBean(ip, port, type); ipList.add(ipBean); } return ipList; }
Example 6
Source File: ZIMuKuCommon.java From SubTitleSearcher with Apache License 2.0 | 5 votes |
/** * 获取下载网址列表 * @return */ public static JSONArray getDetailList(String url) { String result = httpGet(baseUrl+url); //System.out.println(result); Document doc = Jsoup.parse(result); Elements matchList = doc.select("#subtb tbody tr"); if(matchList.size() == 0)return new JSONArray(); //System.out.println(matchList.html()); JSONArray resList = new JSONArray(); for(int i = 0 ; i < matchList.size(); i++) { Element row = matchList.get(i); JSONObject resRow = new JSONObject(); resRow.put("url", row.selectFirst("a").attr("href")); resRow.put("title", row.selectFirst("a").attr("title")); resRow.put("ext", row.selectFirst(".label-info").text()); Elements authorInfos = row.select(".gray"); StringBuffer authorInfo = new StringBuffer(); authorInfos.forEach(element ->{ authorInfo.append(element.text() + ","); }); if(authorInfo.length() > 0) { resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1)); }else { resRow.put("authorInfo", ""); } resRow.put("lang", row.selectFirst("img").attr("alt")); resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", "")); resRow.put("downCount", row.select("td").get(3).text()); resList.add(resRow); } return resList; }
Example 7
Source File: Handian.java From ankihelper with GNU General Public License v3.0 | 5 votes |
public List<Definition> wordLookup(String key) { try { // Document doc = Jsoup.connect(wordUrl + key) // .userAgent(DEFAULT_UA) // .timeout(5000) // .get(); // String html = doc.toString(); Request request = new Request.Builder().url(wordUrl + key) .header("User-Agent", Constant.UA) .build(); String rawhtml = MyApplication.getOkHttpClient().newCall(request).execute().body().string(); Document doc = Jsoup.parse(rawhtml); Elements entrys = doc.select("div.cdnr, div.tagContent"); ArrayList<Definition> defList = new ArrayList<>(); if (entrys.size() > 0) { Element ele = entrys.get(0); String word = key; String meaning = ele.toString(); meaning = meaning.replaceAll("<img src=\"/", "<img src=\"http://www.zdic.net/"); meaning = meaning.replaceAll("&","&"); HashMap<String, String> defMap = new HashMap<>(); String definition = meaning; defMap.put(EXP_ELE[0], word); defMap.put(EXP_ELE[1], definition); defList.add(new Definition(defMap, definition)); } return defList; } catch (IOException ioe) { Log.d("time out", Log.getStackTraceString(ioe)); //Toast.makeText(MyApplication.getContext(), Log.getStackTraceString(ioe), Toast.LENGTH_SHORT).show(); return new ArrayList<Definition>(); } }
Example 8
Source File: JsoupUtil.java From materialup with Apache License 2.0 | 5 votes |
public static List<User> getUpvoters(MuResponse mu) { List<User> users = new ArrayList<>(); if (!OK.equalsIgnoreCase(mu.status)) { return users; } final Element document = Jsoup.parse(mu.content); final Elements upvoters = document.select(".post__upvoters .post__upvoter"); if (upvoters != null && !upvoters.isEmpty()) { int size = upvoters.size(); for (int i = size - 1; i >= 0; i--) { Element e = upvoters.get(i); Element link = e.select("a").first(); String path = attr(link, "href"); if (TextUtils.isEmpty(path)) { continue; } Element img = e.select("img").first(); String avatar = attr(img, "src"); String alt = attr(img, "alt"); User user = new User(); user.setAvatarUrl(avatar); user.setPath(path); users.add(user); } } return users; }
Example 9
Source File: YoupornRipper.java From ripme with MIT License | 5 votes |
@Override public List<String> getURLsFromPage(Document doc) { List<String> results = new ArrayList<>(); Elements videos = doc.select("video"); Element video = videos.get(0); results.add(video.attr("src")); return results; }
Example 10
Source File: JsoupUtils.java From EhViewer with Apache License 2.0 | 5 votes |
@Nullable public static Element getElementByTag(Element element, String tagName) { Elements elements = element.getElementsByTag(tagName); if (elements != null && elements.size() > 0) { return elements.get(0); } else { return null; } }
Example 11
Source File: FileReader.java From calcite with Apache License 2.0 | 5 votes |
private Element getSelectedTable(Document doc, String selector) throws FileReaderException { // get selected elements Elements list = doc.select(selector); // get the element Element el; if (this.index == null) { if (list.size() != 1) { throw new FileReaderException("" + list.size() + " HTML element(s) selected"); } el = list.first(); } else { el = list.get(this.index); } // verify element is a table if (el.tag().getName().equals("table")) { return el; } else { throw new FileReaderException("selected (" + selector + ") element is a " + el.tag().getName() + ", not a table"); } }
Example 12
Source File: ZIMuKuCommon.java From SubTitleSearcher with Apache License 2.0 | 5 votes |
/** * 获取下载网址列表 * @return */ public static JSONArray getDetailList(String url) { String result = httpGet(baseUrl+url); //System.out.println(result); Document doc = Jsoup.parse(result); Elements matchList = doc.select("#subtb tbody tr"); if(matchList.size() == 0)return new JSONArray(); //System.out.println(matchList.html()); JSONArray resList = new JSONArray(); for(int i = 0 ; i < matchList.size(); i++) { Element row = matchList.get(i); JSONObject resRow = new JSONObject(); resRow.put("url", row.selectFirst("a").attr("href")); resRow.put("title", row.selectFirst("a").attr("title")); resRow.put("ext", row.selectFirst(".label-info").text()); Elements authorInfos = row.select(".gray"); StringBuffer authorInfo = new StringBuffer(); authorInfos.forEach(element ->{ authorInfo.append(element.text() + ","); }); if(authorInfo.length() > 0) { resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1)); }else { resRow.put("authorInfo", ""); } resRow.put("lang", row.selectFirst("img").attr("alt")); resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", "")); resRow.put("downCount", row.select("td").get(3).text()); resList.add(resRow); } return resList; }
Example 13
Source File: WordToHtmlRenditionProviderTest.java From spring-content with Apache License 2.0 | 5 votes |
@Test public void testConvert() throws Exception { InputStream converted = service.convert( this.getClass().getResourceAsStream("/sample-docx.docx"), "text/html"); Document doc = Jsoup.parse(converted, "UTF8", "http://example.com"); Elements htmls = doc.getElementsByTag("HTML"); assertThat(htmls.size(), is(1)); Element html = htmls.get(0); assertThat(html, is(not(nullValue()))); }
Example 14
Source File: TestPutHTMLElement.java From localization_nifi with Apache License 2.0 | 5 votes |
@Test public void testAppendPElementToDiv() throws Exception { final String MOD_VALUE = "<p>modified value</p>"; testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "#put"); testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.APPEND_ELEMENT); testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE); testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); testRunner.run(); testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1); testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0); testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1); testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0); List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS); assertTrue(ffs.size() == 1); String data = new String(testRunner.getContentAsByteArray(ffs.get(0))); //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want. Document doc = Jsoup.parse(data); Elements eles = doc.select("#put"); Element ele = eles.get(0); assertTrue(StringUtils.equals("<a href=\"httpd://localhost\"></a> \n" + "<p>modified value</p>", ele.html())); }
Example 15
Source File: MagicVillePricer.java From MtgDesktopCompanion with GNU General Public License v3.0 | 4 votes |
public List<MagicPrice> getLocalePrice(MagicEdition me, MagicCard card) throws IOException { List<MagicPrice> list = new ArrayList<>(); String res = httpclient.doPost(getString(WEBSITE)+"/fr/resultats.php?zbob=1", httpclient.buildMap().put("recherche_titre", card.getName()).build(), null); if(res.length()>100) { logger.error("too much result"); return list; } String key = "ref="; String code = res.substring(res.indexOf(key), res.indexOf("\";")); String url = getString(WEBSITE)+"/fr/register/show_card_sale?"+code; logger.info(getName() + " looking for prices " + url); Document doc =URLTools.extractHtml(url); Element table = null; try { table = doc.select("table[width=98%]").get(2); // select the first table. } catch (IndexOutOfBoundsException e) { logger.info(getName() + " no sellers"); return list; } Elements rows = table.select(MTGConstants.HTML_TAG_TR); for (int i = 3; i < rows.size(); i = i + 2) { Element ligne = rows.get(i); Elements cols = ligne.getElementsByTag(MTGConstants.HTML_TAG_TD); MagicPrice mp = new MagicPrice(); String price = cols.get(4).text(); price = price.substring(0, price.length() - 1); mp.setValue(Double.parseDouble(price)); mp.setCurrency("EUR"); mp.setSeller(cols.get(0).text()); mp.setSite(getName()); mp.setUrl(url); mp.setQuality(cols.get(2).text()); mp.setLanguage(cols.get(1).getElementsByTag("span").text()); mp.setCountry("France"); mp.setFoil(mp.getLanguage().toLowerCase().contains("foil")); list.add(mp); } logger.info(getName() + " found " + list.size() + " item(s) return " + getString(MAX) + " items"); if (list.size() > getInt(MAX) && getInt(MAX) > -1) return list.subList(0, getInt(MAX)); return list; }
Example 16
Source File: HiParser.java From hipda with GNU General Public License v2.0 | 4 votes |
private static SimpleListBean parseFavorites(Document doc) { if (doc == null) { return null; } SimpleListBean list = new SimpleListBean(); int last_page = 1; //if this is the last page, page number is in <strong> Elements pagesES = doc.select("div.pages a"); pagesES.addAll(doc.select("div.pages strong")); if (pagesES.size() > 0) { for (Node n : pagesES) { int tmp = Utils.getIntFromString(((Element) n).text()); if (tmp > last_page) { last_page = tmp; } } } list.setMaxPage(last_page); Elements trES = doc.select("table.datatable tbody tr"); for (int i = 0; i < trES.size(); ++i) { Element trE = trES.get(i); SimpleListItemBean item = new SimpleListItemBean(); Elements subjectES = trE.select("th"); if (subjectES.size() == 0) { continue; } item.setTitle(subjectES.first().text()); Elements subjectAES = subjectES.first().select("a"); if (subjectAES.size() == 0) { continue; } String href = subjectAES.first().attr("href"); item.setTid(Utils.getMiddleString(href, "tid=", "&")); Elements timeES = trE.select("td.lastpost"); if (timeES.size() > 0) { item.setTime(timeES.first().text().trim()); } Elements forumES = trE.select("td.forum"); if (forumES.size() > 0) { item.setForum(forumES.first().text().trim()); } list.add(item); } return list; }
Example 17
Source File: MagicBazarShopper.java From MtgDesktopCompanion with GNU General Public License v3.0 | 4 votes |
private List<OrderEntry> parse(Document doc, String id, Date date) { List<OrderEntry> entries = new ArrayList<>(); Elements table = doc.select("div.table div.tr"); table.remove(0); for(int i=0;i<table.size();i++) { Element e = table.get(i); boolean iscard=e.hasClass("filterElement"); String name = e.select("div.td.name").text(); if(!name.isEmpty()) { OrderEntry entrie = new OrderEntry(); entrie.setIdTransation(id); entrie.setSource(getName()); entrie.setCurrency(Currency.getInstance("EUR")); entrie.setSeller(getName()); entrie.setTypeTransaction(TYPE_TRANSACTION.BUY); entrie.setTransactionDate(date); entrie.setDescription(name); if(iscard) { entrie.setType(TYPE_ITEM.CARD); entrie.setDescription(e.select("div.td.name.name_mobile").text()); entrie.setItemPrice(UITools.parseDouble(e.attr("attribute_price"))); String set = e.select("div.td.ext img").attr("title"); try { entrie.setEdition(MTGControler.getInstance().getEnabled(MTGCardsProvider.class).getSetByName(set)); } catch(Exception ex) { logger.error(set + " is not found"); } } else { String price =e.select("div.new_price").html().replaceAll(" "+Currency.getInstance("EUR").getSymbol(), "").trim(); entrie.setItemPrice(UITools.parseDouble(price)); if(entrie.getDescription().contains("Set")||entrie.getDescription().toLowerCase().contains("collection")) entrie.setType(TYPE_ITEM.FULLSET); else if(entrie.getDescription().toLowerCase().contains("booster")) entrie.setType(TYPE_ITEM.BOOSTER); else if(entrie.getDescription().toLowerCase().startsWith("boite de") || entrie.getDescription().contains("Display") ) entrie.setType(TYPE_ITEM.BOX); else entrie.setType(TYPE_ITEM.LOTS); } notify(entrie); entries.add(entrie); } } return entries; }
Example 18
Source File: JsoupText.java From MD with Apache License 2.0 | 4 votes |
public static void main(String[] str) throws IOException { Document doc = Jsoup.parse(new URL(urls), 5000); //获取页数 Elements es_page = doc.getElementsByClass("page").first().getElementsByTag("select").first().getElementsByTag("option"); for (int i = 0; i < es_page.size(); i++) { Element et = es_page.get(i); if (et != null) { System.out.println(et.attr("value")); } } // // // //Video // ArrayList<Video> list = new ArrayList<>(); // // Document doc = Jsoup.parse(new URL(url), 5000); // Elements es_item = doc.getElementsByClass("item"); // for (int i = 0; i < es_item.size(); i++) { // Element et = es_item.get(i).getElementsByTag("h3").first(); // if (et != null) { // String title = et.getElementsByTag("b").text(); // String img = es_item.get(i).select("img").first().attr("src"); // String url = es_item.get(i).getElementsByClass("read").first().attr("href"); // Document docs = Jsoup.parse(new URL(Ip.url + url), 5000); // String urls = docs.getElementsByTag("iframe").attr("src"); // list.add(new Video(title, img, urls)); // // } // // } // for (int i = 0; i < list.size(); i++) { // System.out.println(list.get(i).toString()); // } //GIF // ArrayList<Gif> list = new ArrayList<>(); // // Document doc = Jsoup.parse(new URL(url), 5000); // Elements es_item = doc.getElementsByClass("item"); // for (int i = 0; i < es_item.size(); i++) { // Element et = es_item.get(i).getElementsByTag("h3").first(); // if (et != null) { // String title = et.getElementsByTag("b").text(); // String img = es_item.get(i).select("img").first().attr("src"); // String url = es_item.get(i).getElementsByClass("read").first().attr("href"); // list.add(new Gif(title, url)); // // } // // } // for (int i = 0; i < list.size(); i++) { // System.out.println(list.get(i).toString()); // } }
Example 19
Source File: ParseProxy.java From v9porn with MIT License | 4 votes |
public static BaseResult<List<ProxyModel>> parseXiCiDaiLi(String html, int page) { BaseResult<List<ProxyModel>> baseResult = new BaseResult<>(); baseResult.setTotalPage(1); Document doc = Jsoup.parse(html); Element ipList = doc.getElementById("ip_list"); Elements trs = ipList.select("tr"); int trSize = trs.size(); List<ProxyModel> proxyModelList = new ArrayList<>(); for (int i = 0; i < trSize; i++) { //第一是标题,跳过 if (i == 0) { continue; } //tr里的td Elements tds = trs.get(i).select("td"); ProxyModel proxyModel = new ProxyModel(); for (int j = 0; j < tds.size(); j++) { Element td = tds.get(j); switch (j) { case 0: //国家 break; case 1: //ip String ip = td.text(); proxyModel.setProxyIp(ip); break; case 2: //端口 String port = td.text(); proxyModel.setProxyPort(port); break; case 3: //城市 break; case 4: //匿名度 String anonymous = td.text(); proxyModel.setAnonymous(anonymous); break; case 5: //类型 http https socket String type = td.text(); if ("http".equalsIgnoreCase(type)) { proxyModel.setType(ProxyModel.TYPE_HTTP); } else if ("https".equalsIgnoreCase(type)) { proxyModel.setType(ProxyModel.TYPE_HTTPS); } else { proxyModel.setType(ProxyModel.TYPE_SOCKS); } break; case 6: //速度 break; case 7: //连接时间 String responseTime = td.select("div").first().attr("title"); proxyModel.setResponseTime(responseTime); break; case 8: //存活时间 break; case 9: //验证时间 break; default: } } proxyModelList.add(proxyModel); } baseResult.setData(proxyModelList); if (page == 1) { Elements elements = doc.getElementsByClass("pagination").first().select("a"); if (elements.size() > 3) { String totalPageStr = elements.get(elements.size() - 2).text(); Logger.t(TAG).d(totalPageStr); if (TextUtils.isDigitsOnly(totalPageStr)) { baseResult.setTotalPage(Integer.parseInt(totalPageStr)); } } } return baseResult; }
Example 20
Source File: FreeSSRCrawlerServiceImpl.java From ShadowSocks-Share with Apache License 2.0 | 4 votes |
/** * 网页内容解析 ss 信息 */ @Override protected Set<ShadowSocksDetailsEntity> parse(Document document) { Elements ssList = document.select("div.text-center"); Set<ShadowSocksDetailsEntity> set = new HashSet<>(ssList.size()); for (int i = 0; i < ssList.size(); i++) { try { Element element = ssList.get(i); // 取 h4 信息,为 ss 信息 Elements ssHtml = element.select("h4"); if (ssHtml.size() >= 5) { // server String server = StringUtils.remove(ssHtml.get(0).text(), "服务器地址:"); Assert.hasLength(server, "server 不能为空"); int server_port = NumberUtils.toInt(StringUtils.remove(ssHtml.get(1).text(), "端口:")); // Assert.isNull(port, "port 不能为空"); String password = StringUtils.remove(ssHtml.get(2).text(), "密码:"); Assert.hasLength(password, "password 不能为空"); String method = StringUtils.remove(ssHtml.get(3).text(), "加密方式:"); Assert.hasLength(method, "method 不能为空"); // 账号状态 String status = ssHtml.get(4).text(); if (status.contains("正常")) { ShadowSocksDetailsEntity ss = new ShadowSocksDetailsEntity(server, server_port, password, method, SS_PROTOCOL, SS_OBFS); ss.setValid(false); ss.setValidTime(new Date()); ss.setTitle(document.title()); ss.setRemarks(TARGET_URL); ss.setGroup("ShadowSocks-Share"); // 测试网络 if (isReachable(ss)) ss.setValid(true); // 无论是否可用都入库 set.add(ss); log.debug("*************** 第 {} 条 ***************{}{}", i + 1, System.lineSeparator(), ss); // log.debug("{}", ss.getLink()); } } } catch (Exception e) { log.error(e.getMessage(), e); } } return set; }