Java Code Examples for org.jsoup.nodes.Document#body()
The following examples show how to use
org.jsoup.nodes.Document#body() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SelectorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test @MultiLocaleTest public void containsData() { String html = "<p>function</p><script>FUNCTION</script><style>item</style><span><!-- comments --></span>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Elements dataEls1 = body.select(":containsData(function)"); Elements dataEls2 = body.select("script:containsData(function)"); Elements dataEls3 = body.select("span:containsData(comments)"); Elements dataEls4 = body.select(":containsData(o)"); Elements dataEls5 = body.select("style:containsData(ITEM)"); assertEquals(2, dataEls1.size()); // body and script assertEquals(1, dataEls2.size()); assertEquals(dataEls1.last(), dataEls2.first()); assertEquals("<script>FUNCTION</script>", dataEls2.outerHtml()); assertEquals(1, dataEls3.size()); assertEquals("span", dataEls3.first().tagName()); assertEquals(3, dataEls4.size()); assertEquals("body", dataEls4.first().tagName()); assertEquals("script", dataEls4.get(1).tagName()); assertEquals("span", dataEls4.get(2).tagName()); assertEquals(1, dataEls5.size()); }
Example 2
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void createsDocumentStructure() { String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>"; Document doc = Jsoup.parse(html); Element head = doc.head(); Element body = doc.body(); assertEquals(1, doc.children().size()); // root node: contains html node assertEquals(2, doc.child(0).children().size()); // html node: head and body assertEquals(3, head.children().size()); assertEquals(1, body.children().size()); assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name")); assertEquals(0, body.getElementsByTag("meta").size()); assertEquals("jsoup", doc.title()); assertEquals("Hello world", body.text()); assertEquals("Hello world", body.children().get(0).text()); }
Example 3
Source File: SelectorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test @MultiLocaleTest public void containsData() { String html = "<p>function</p><script>FUNCTION</script><style>item</style><span><!-- comments --></span>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Elements dataEls1 = body.select(":containsData(function)"); Elements dataEls2 = body.select("script:containsData(function)"); Elements dataEls3 = body.select("span:containsData(comments)"); Elements dataEls4 = body.select(":containsData(o)"); Elements dataEls5 = body.select("style:containsData(ITEM)"); assertEquals(2, dataEls1.size()); // body and script assertEquals(1, dataEls2.size()); assertEquals(dataEls1.last(), dataEls2.first()); assertEquals("<script>FUNCTION</script>", dataEls2.outerHtml()); assertEquals(1, dataEls3.size()); assertEquals("span", dataEls3.first().tagName()); assertEquals(3, dataEls4.size()); assertEquals("body", dataEls4.first().tagName()); assertEquals("script", dataEls4.get(1).tagName()); assertEquals("span", dataEls4.get(2).tagName()); assertEquals(1, dataEls5.size()); }
Example 4
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void createsDocumentStructure() { String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>"; Document doc = Jsoup.parse(html); Element head = doc.head(); Element body = doc.body(); assertEquals(1, doc.children().size()); // root node: contains html node assertEquals(2, doc.child(0).children().size()); // html node: head and body assertEquals(3, head.children().size()); assertEquals(1, body.children().size()); assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name")); assertEquals(0, body.getElementsByTag("meta").size()); assertEquals("jsoup", doc.title()); assertEquals("Hello world", body.text()); assertEquals("Hello world", body.children().get(0).text()); }
Example 5
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 6 votes |
@Test public void renderUI() throws IOException { TestUI anotherUI = new TestUI(); initUI(testUI); anotherUI.getInternals().setSession(session); VaadinRequest vaadinRequest = createVaadinRequest(); anotherUI.doInit(vaadinRequest, 0); anotherUI.getRouter().initializeUI(anotherUI, request); anotherUI.getInternals() .setContextRoot(contextRootRelativePath(request)); BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest, null, session, anotherUI, this::contextRootRelativePath); Document page = pageBuilder.getBootstrapPage(bootstrapContext); Element body = page.body(); assertEquals(2, body.childNodeSize()); assertEquals("noscript", body.child(0).tagName()); }
Example 6
Source File: LoginWebView.java From NClientV2 with Apache License 2.0 | 6 votes |
@Override public void fetchUrl(String url, String html) { Document jsoup=Jsoup.parse(html); Element body=jsoup.body(); Element form=body.getElementsByTag("form").first(); body.getElementsByClass("lead").first().text("Tested"); form.tagName("div"); form.before("<script>\n" + "document.getElementsByClassName('lead')[0].innerHTML='test';\n"+ "alert('test');\n"+ "function intercept(){\n" + " password=document.getElementById('id_password').value;\n" + " email=document.getElementById('id_username_or_email').value;\n" + " token=document.getElementsByName('csrfmiddlewaretoken')[0].value;\n" + " captcha=document.getElementById('g-recaptcha-response').value;\n" + " Interceptor.intercept(email,password,token,captcha);\n" + "}\n" + "</script>"); form.getElementsByAttributeValue("type","submit").first().attr("onclick","intercept()"); removeFetcher(fetcher); String encodedHtml = Base64.encodeToString(jsoup.outerHtml().getBytes(), Base64.NO_PADDING); loadDataWithBaseURL(Utility.getBaseUrl(), encodedHtml,"text/html","base64",null); }
Example 7
Source File: Parser.java From astor with GNU General Public License v2.0 | 5 votes |
/** * Parse a fragment of HTML into the {@code body} of a Document. * * @param bodyHtml fragment of HTML * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return Document, with empty head, and HTML parsed into body */ public static Document parseBodyFragment(String bodyHtml, String baseUri) { Document doc = Document.createShell(baseUri); Element body = doc.body(); List<Node> nodeList = parseFragment(bodyHtml, body, baseUri); Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented for (int i = nodes.length - 1; i > 0; i--) { nodes[i].remove(); } for (Node node : nodes) { body.appendChild(node); } return doc; }
Example 8
Source File: TagServlet.java From firing-range with Apache License 2.0 | 5 votes |
@Override public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { if (request.getParameter("q") == null) { Responses.sendError(response, "Missing q parameter", 400); return; } String q = request.getParameter("q"); Document doc = Jsoup.parseBodyFragment(q); Element body = doc.body(); Elements elements = body.getAllElements(); if (!(q.contains("body"))){ elements.remove(body); } if (elements.isEmpty()) { Responses.sendError(response, "Invalid input, no tags", 400); return; } String allowedTag = ""; String allowedAttribute = ""; if (request.getPathInfo() != null) { String pathInfo = request.getPathInfo().substring(1); if (pathInfo.contains("/")) { allowedTag = pathInfo.split("/", 2)[0]; allowedAttribute = pathInfo.split("/")[1]; } else { allowedTag = pathInfo; } } handleRequest(elements, response, allowedTag, allowedAttribute); }
Example 9
Source File: Parser.java From jsoup-learning with MIT License | 5 votes |
/** * Parse a fragment of HTML into the {@code body} of a Document. * * @param bodyHtml fragment of HTML * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return Document, with empty head, and HTML parsed into body */ public static Document parseBodyFragment(String bodyHtml, String baseUri) { Document doc = Document.createShell(baseUri); Element body = doc.body(); List<Node> nodeList = parseFragment(bodyHtml, body, baseUri); Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented for (Node node : nodes) { body.appendChild(node); } return doc; }
Example 10
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void parsesComments() { String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Comment comment = (Comment) body.childNode(1); // comment should not be sub of img, as it's an empty tag assertEquals(" <table><tr><td></table> ", comment.getData()); Element p = body.child(1); TextNode text = (TextNode) p.childNode(0); assertEquals("Hello", text.getWholeText()); }
Example 11
Source File: Parser.java From astor with GNU General Public License v2.0 | 5 votes |
/** * Parse a fragment of HTML into the {@code body} of a Document. * * @param bodyHtml fragment of HTML * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return Document, with empty head, and HTML parsed into body */ public static Document parseBodyFragment(String bodyHtml, String baseUri) { Document doc = Document.createShell(baseUri); Element body = doc.body(); List<Node> nodeList = parseFragment(bodyHtml, body, baseUri); Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented for (int i = nodes.length - 1; i > 0; i--) { nodes[i].remove(); } for (Node node : nodes) { body.appendChild(node); } return doc; }
Example 12
Source File: ZeppelinRDisplay.java From zeppelin with Apache License 2.0 | 5 votes |
public static RDisplay render( String html, String imageWidth) { Document document = Jsoup.parse(html); document.outputSettings().prettyPrint(false); Element body = document.body(); if (body.getElementsByTag("p").isEmpty()) { return new RDisplay(body.html(), Type.HTML, Code.SUCCESS); } String bodyHtml = body.html(); if (! bodyHtml.contains("<img") && ! bodyHtml.contains("<script") && ! bodyHtml.contains("%html ") && ! bodyHtml.contains("%table ") && ! bodyHtml.contains("%img ") ) { return textDisplay(body); } if (bodyHtml.contains("%table")) { return tableDisplay(body); } if (bodyHtml.contains("%img")) { return imgDisplay(body); } return htmlDisplay(body, imageWidth); }
Example 13
Source File: RemoveEmptyText.java From baleen with Apache License 2.0 | 5 votes |
@Override public void manipulate(Document document) { Element body = document.body(); while (!removeEmpty(body)) { // Repeat as needed.... work done in the while } }
Example 14
Source File: JsoupUtils.java From springboot-admin with Apache License 2.0 | 5 votes |
public static String getBodyHtml(String html) { if (StringUtils.isNotBlank(html)) { Document document = Jsoup.parse(html); if (null != document && document.body() != null) { return document.body().html().toString(); } } return html; }
Example 15
Source File: BaseSoup.java From ShareBox with Apache License 2.0 | 5 votes |
public Map<String, Object> doParse(Object... arg) { mArguments = arg; if (mValues == null) { mValues = new HashMap<>(); } Document doc = Jsoup.parse(mHtml); mHeader = doc.head(); mBody = doc.body(); parse(doc, mHeader, mBody, mValues); return mValues; }
Example 16
Source File: IpProxy.java From emotional_analysis with Apache License 2.0 | 5 votes |
public static List<IpEntity> getProxyIp(String url) throws Exception{ ArrayList<IpEntity> ipList = new ArrayList<>(); Response execute = Jsoup.connect(url) .header("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36") .header("Cache-Control", "max-age=60").header("Accept", "*/*") .header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive") .header("Referer", "http://music.163.com/song?id=186016") .header("Origin", "http://music.163.com").header("Host", "music.163.com") .header("Content-Type", "application/x-www-form-urlencoded") .header("Cookie", "UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; P_INFO=m18703414250@163.com|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|18703414250@163.com; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/") .method(Method.GET).ignoreContentType(true) .timeout(2099999999).execute(); Document pageJson = execute.parse(); Element body = pageJson.body(); List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes(); //把前10位的代理IP放到List中 for(int i = 2;i <= 30;i += 2){ IpEntity ipEntity = new IpEntity(); Node node = childNodes.get(i); List<Node> nodes = node.childNodes(); String ip = nodes.get(3).childNode(0).toString(); int port = Integer.parseInt(nodes.get(5).childNode(0).toString()); ipEntity.setIp(ip); ipEntity.setPort(port); ipList.add(ipEntity); } return ipList; }
Example 17
Source File: ThreadPageRequest.java From something.apk with MIT License | 4 votes |
public static ThreadPage processThreadPage(Document document, boolean showImages, boolean showAvatars, boolean hidePreviouslyReadImages, long jumpToPost, String redirectedUrl){ ArrayList<HashMap<String, String>> posts = new ArrayList<HashMap<String, String>>(); int currentPage, maxPage = 1, threadId, forumId, unread; String jumpToId = jumpToPost > 0 ? "#post"+jumpToPost : null; String ptiFragment = null; if(!TextUtils.isEmpty(redirectedUrl)){ Uri url = Uri.parse(redirectedUrl); ptiFragment = url.getFragment(); if("lastpost".matches(ptiFragment)){ ptiFragment = null; jumpToId = "#lastpost"; } } Element pages = document.getElementsByClass("pages").first(); currentPage = FastUtils.safeParseInt(pages.getElementsByAttribute("selected").attr("value"), 1); Element lastPage = pages.getElementsByTag("option").last(); if(lastPage != null){ maxPage = FastUtils.safeParseInt(lastPage.attr("value"), 1); } boolean bookmarked = document.getElementsByClass("unbookmark").size() > 0; String threadTitle = TextUtils.htmlEncode(document.getElementsByClass("bclast").first().text()); Element body = document.body(); forumId = Integer.parseInt(body.attr("data-forum")); threadId = Integer.parseInt(body.attr("data-thread")); Elements threadbars = document.getElementsByClass("threadbar"); boolean canReply = !Constants.isArchiveForum(forumId) && threadbars.first().getElementsByAttributeValueContaining("src", "images/forum-closed.gif").size() == 0; unread = parsePosts(document, posts, showImages, showAvatars, hidePreviouslyReadImages, ptiFragment, canReply, currentPage == maxPage, forumId); StringBuilder builder = new StringBuilder(2048); int previouslyRead = posts.size()-unread; HashMap<String, String> headerArgs = new HashMap<String, String>(); headerArgs.put("jumpToPostId", jumpToId); headerArgs.put("fontSize", SomePreferences.fontSize); headerArgs.put("theme", getTheme(forumId)); headerArgs.put("previouslyRead", previouslyRead > 0 && unread > 0 ? previouslyRead+" Previous Post"+(previouslyRead > 1 ? "s":"") : null); MustCache.applyHeaderTemplate(builder, headerArgs); for(HashMap<String, String> post : posts){ MustCache.applyPostTemplate(builder, post); } MustCache.applyFooterTemplate(builder, null); ThreadItem cachedThread = ThreadManager.getThread(threadId); if(cachedThread != null){ cachedThread.updateUnreadCount(currentPage, maxPage, SomePreferences.threadPostPerPage); } return new ThreadPage(builder.toString(), currentPage, maxPage, threadId, forumId, threadTitle, -unread, bookmarked, canReply); }
Example 18
Source File: AbstractHtmlConsumer.java From baleen with Apache License 2.0 | 4 votes |
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final File f = getFileName(jCas); final DocumentAnnotation da = getDocumentAnnotation(jCas); final Document doc = Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>"); doc.outputSettings(new Document.OutputSettings().prettyPrint(false)); final Element head = doc.head(); if (!Strings.isNullOrEmpty(css)) { final Element cssLink = head.appendElement("link"); cssLink.attr("rel", "stylesheet"); cssLink.attr("href", css); } final Element charset = head.appendElement("meta"); charset.attr("charset", "utf-8"); appendMeta(head, "document.type", da.getDocType()); appendMeta(head, "document.sourceUri", da.getSourceUri()); appendMeta(head, "externalId", da.getHash()); appendMeta(head, "document.classification", da.getDocumentClassification()); appendMeta( head, "document.caveats", String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats()))); appendMeta( head, "document.releasability", String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability()))); String title = null; for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) { appendMeta(head, md.getKey(), md.getValue()); if ("documentTitle".equalsIgnoreCase(md.getKey())) { title = md.getValue(); } } if (!Strings.isNullOrEmpty(title)) { doc.title(title); } final Element body = doc.body(); writeBody(jCas, body); try { FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset()); } catch (final IOException e) { throw new AnalysisEngineProcessException(e); } }
Example 19
Source File: Expression.java From firing-range with Apache License 2.0 | 4 votes |
@Override public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { if (request.getParameter("q") == null) { Responses.sendError(response, "Missing q parameter", 400); return; } String q = request.getParameter("q"); Document doc = Jsoup.parseBodyFragment(q); Element body = doc.body(); Elements elements = body.getAllElements(); elements.remove(body); if (elements.isEmpty()) { Responses.sendError(response, "Invalid input, no tags", 400); return; } StringBuilder res = new StringBuilder(); for (Element element : elements) { boolean validElement = true; Attributes attributes = element.attributes(); for (Attribute attribute : attributes) { if (attribute.getKey().toLowerCase().startsWith("on") || attribute.getKey().toLowerCase().equals("href") || attribute.getKey().toLowerCase().equals("src")) { validElement = false; } if (attribute.getKey().toLowerCase().equals("style") && attribute.getValue().toLowerCase().contains("expression")) { validElement = false; } } if (validElement) { res.append(element.toString()); } } Responses.sendXssed(response, res.toString()); }
Example 20
Source File: SMSender.java From rebuild with GNU General Public License v3.0 | 4 votes |
/** * @return * @throws IOException */ protected static Element getMailTemplate() throws IOException { File tmp = SysConfiguration.getFileOfRes("locales/email_zh-CN.html"); Document html = Jsoup.parse(tmp, "utf-8"); return html.body(); }