Java Code Examples for org.jsoup.nodes.Document#body()
The following examples show how to use
org.jsoup.nodes.Document#body() .
Example 1
Source File: From astor with GNU General Public License v2.0 | 6 votes |
@Test @MultiLocaleTest public void containsData() { String html = "<p>function</p><script>FUNCTION</script><style>item</style><span><!-- comments --></span>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Elements dataEls1 =":containsData(function)"); Elements dataEls2 ="script:containsData(function)"); Elements dataEls3 ="span:containsData(comments)"); Elements dataEls4 =":containsData(o)"); Elements dataEls5 ="style:containsData(ITEM)"); assertEquals(2, dataEls1.size()); // body and script assertEquals(1, dataEls2.size()); assertEquals(dataEls1.last(), dataEls2.first()); assertEquals("<script>FUNCTION</script>", dataEls2.outerHtml()); assertEquals(1, dataEls3.size()); assertEquals("span", dataEls3.first().tagName()); assertEquals(3, dataEls4.size()); assertEquals("body", dataEls4.first().tagName()); assertEquals("script", dataEls4.get(1).tagName()); assertEquals("span", dataEls4.get(2).tagName()); assertEquals(1, dataEls5.size()); }
Example 2
Source File: From astor with GNU General Public License v2.0 | 6 votes |
@Test public void createsDocumentStructure() { String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>"; Document doc = Jsoup.parse(html); Element head = doc.head(); Element body = doc.body(); assertEquals(1, doc.children().size()); // root node: contains html node assertEquals(2, doc.child(0).children().size()); // html node: head and body assertEquals(3, head.children().size()); assertEquals(1, body.children().size()); assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name")); assertEquals(0, body.getElementsByTag("meta").size()); assertEquals("jsoup", doc.title()); assertEquals("Hello world", body.text()); assertEquals("Hello world", body.children().get(0).text()); }
Example 3
Source File: From astor with GNU General Public License v2.0 | 6 votes |
Example 4
Source File: From astor with GNU General Public License v2.0 | 6 votes |
Example 5
Source File: From flow with Apache License 2.0 | 6 votes |
@Test public void renderUI() throws IOException { TestUI anotherUI = new TestUI(); initUI(testUI); anotherUI.getInternals().setSession(session); VaadinRequest vaadinRequest = createVaadinRequest(); anotherUI.doInit(vaadinRequest, 0); anotherUI.getRouter().initializeUI(anotherUI, request); anotherUI.getInternals() .setContextRoot(contextRootRelativePath(request)); BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest, null, session, anotherUI, this::contextRootRelativePath); Document page = pageBuilder.getBootstrapPage(bootstrapContext); Element body = page.body(); assertEquals(2, body.childNodeSize()); assertEquals("noscript", body.child(0).tagName()); }
Example 6
Source File: From NClientV2 with Apache License 2.0 | 6 votes |
@Override public void fetchUrl(String url, String html) { Document jsoup=Jsoup.parse(html); Element body=jsoup.body(); Element form=body.getElementsByTag("form").first(); body.getElementsByClass("lead").first().text("Tested"); form.tagName("div"); form.before("<script>\n" + "document.getElementsByClassName('lead')[0].innerHTML='test';\n"+ "alert('test');\n"+ "function intercept(){\n" + " password=document.getElementById('id_password').value;\n" + " email=document.getElementById('id_username_or_email').value;\n" + " token=document.getElementsByName('csrfmiddlewaretoken')[0].value;\n" + " captcha=document.getElementById('g-recaptcha-response').value;\n" + " Interceptor.intercept(email,password,token,captcha);\n" + "}\n" + "</script>"); form.getElementsByAttributeValue("type","submit").first().attr("onclick","intercept()"); removeFetcher(fetcher); String encodedHtml = Base64.encodeToString(jsoup.outerHtml().getBytes(), Base64.NO_PADDING); loadDataWithBaseURL(Utility.getBaseUrl(), encodedHtml,"text/html","base64",null); }
Example 7
Source File: From astor with GNU General Public License v2.0 | 5 votes |
/** * Parse a fragment of HTML into the {@code body} of a Document. * * @param bodyHtml fragment of HTML * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return Document, with empty head, and HTML parsed into body */ public static Document parseBodyFragment(String bodyHtml, String baseUri) { Document doc = Document.createShell(baseUri); Element body = doc.body(); List<Node> nodeList = parseFragment(bodyHtml, body, baseUri); Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented for (int i = nodes.length - 1; i > 0; i--) { nodes[i].remove(); } for (Node node : nodes) { body.appendChild(node); } return doc; }
Example 8
Source File: From firing-range with Apache License 2.0 | 5 votes |
@Override public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { if (request.getParameter("q") == null) { Responses.sendError(response, "Missing q parameter", 400); return; } String q = request.getParameter("q"); Document doc = Jsoup.parseBodyFragment(q); Element body = doc.body(); Elements elements = body.getAllElements(); if (!(q.contains("body"))){ elements.remove(body); } if (elements.isEmpty()) { Responses.sendError(response, "Invalid input, no tags", 400); return; } String allowedTag = ""; String allowedAttribute = ""; if (request.getPathInfo() != null) { String pathInfo = request.getPathInfo().substring(1); if (pathInfo.contains("/")) { allowedTag = pathInfo.split("/", 2)[0]; allowedAttribute = pathInfo.split("/")[1]; } else { allowedTag = pathInfo; } } handleRequest(elements, response, allowedTag, allowedAttribute); }
Example 9
Source File: From jsoup-learning with MIT License | 5 votes |
/** * Parse a fragment of HTML into the {@code body} of a Document. * * @param bodyHtml fragment of HTML * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return Document, with empty head, and HTML parsed into body */ public static Document parseBodyFragment(String bodyHtml, String baseUri) { Document doc = Document.createShell(baseUri); Element body = doc.body(); List<Node> nodeList = parseFragment(bodyHtml, body, baseUri); Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented for (Node node : nodes) { body.appendChild(node); } return doc; }
Example 10
Source File: From astor with GNU General Public License v2.0 | 5 votes |
@Test public void parsesComments() { String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Comment comment = (Comment) body.childNode(1); // comment should not be sub of img, as it's an empty tag assertEquals(" <table><tr><td></table> ", comment.getData()); Element p = body.child(1); TextNode text = (TextNode) p.childNode(0); assertEquals("Hello", text.getWholeText()); }
Example 11
Source File: From astor with GNU General Public License v2.0 | 5 votes |
Example 12
Source File: From zeppelin with Apache License 2.0 | 5 votes |
public static RDisplay render( String html, String imageWidth) { Document document = Jsoup.parse(html); document.outputSettings().prettyPrint(false); Element body = document.body(); if (body.getElementsByTag("p").isEmpty()) { return new RDisplay(body.html(), Type.HTML, Code.SUCCESS); } String bodyHtml = body.html(); if (! bodyHtml.contains("<img") && ! bodyHtml.contains("<script") && ! bodyHtml.contains("%html ") && ! bodyHtml.contains("%table ") && ! bodyHtml.contains("%img ") ) { return textDisplay(body); } if (bodyHtml.contains("%table")) { return tableDisplay(body); } if (bodyHtml.contains("%img")) { return imgDisplay(body); } return htmlDisplay(body, imageWidth); }
Example 13
Source File: From baleen with Apache License 2.0 | 5 votes |
@Override public void manipulate(Document document) { Element body = document.body(); while (!removeEmpty(body)) { // Repeat as needed.... work done in the while } }
Example 14
Source File: From springboot-admin with Apache License 2.0 | 5 votes |
public static String getBodyHtml(String html) { if (StringUtils.isNotBlank(html)) { Document document = Jsoup.parse(html); if (null != document && document.body() != null) { return document.body().html().toString(); } } return html; }
Example 15
Source File: From ShareBox with Apache License 2.0 | 5 votes |
public Map<String, Object> doParse(Object... arg) { mArguments = arg; if (mValues == null) { mValues = new HashMap<>(); } Document doc = Jsoup.parse(mHtml); mHeader = doc.head(); mBody = doc.body(); parse(doc, mHeader, mBody, mValues); return mValues; }
Example 16
Source File: From emotional_analysis with Apache License 2.0 | 5 votes |
public static List<IpEntity> getProxyIp(String url) throws Exception{ ArrayList<IpEntity> ipList = new ArrayList<>(); Response execute = Jsoup.connect(url) .header("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36") .header("Cache-Control", "max-age=60").header("Accept", "*/*") .header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive") .header("Referer", "") .header("Origin", "").header("Host", "") .header("Content-Type", "application/x-www-form-urlencoded") .header("Cookie", "UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D;|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624;|utmccn=(referral)|utmcmd=referral|utmcct=/") .method(Method.GET).ignoreContentType(true) .timeout(2099999999).execute(); Document pageJson = execute.parse(); Element body = pageJson.body(); List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes(); //把前10位的代理IP放到List中 for(int i = 2;i <= 30;i += 2){ IpEntity ipEntity = new IpEntity(); Node node = childNodes.get(i); List<Node> nodes = node.childNodes(); String ip = nodes.get(3).childNode(0).toString(); int port = Integer.parseInt(nodes.get(5).childNode(0).toString()); ipEntity.setIp(ip); ipEntity.setPort(port); ipList.add(ipEntity); } return ipList; }
Example 17
Source File: From something.apk with MIT License | 4 votes |
public static ThreadPage processThreadPage(Document document, boolean showImages, boolean showAvatars, boolean hidePreviouslyReadImages, long jumpToPost, String redirectedUrl){ ArrayList<HashMap<String, String>> posts = new ArrayList<HashMap<String, String>>(); int currentPage, maxPage = 1, threadId, forumId, unread; String jumpToId = jumpToPost > 0 ? "#post"+jumpToPost : null; String ptiFragment = null; if(!TextUtils.isEmpty(redirectedUrl)){ Uri url = Uri.parse(redirectedUrl); ptiFragment = url.getFragment(); if("lastpost".matches(ptiFragment)){ ptiFragment = null; jumpToId = "#lastpost"; } } Element pages = document.getElementsByClass("pages").first(); currentPage = FastUtils.safeParseInt(pages.getElementsByAttribute("selected").attr("value"), 1); Element lastPage = pages.getElementsByTag("option").last(); if(lastPage != null){ maxPage = FastUtils.safeParseInt(lastPage.attr("value"), 1); } boolean bookmarked = document.getElementsByClass("unbookmark").size() > 0; String threadTitle = TextUtils.htmlEncode(document.getElementsByClass("bclast").first().text()); Element body = document.body(); forumId = Integer.parseInt(body.attr("data-forum")); threadId = Integer.parseInt(body.attr("data-thread")); Elements threadbars = document.getElementsByClass("threadbar"); boolean canReply = !Constants.isArchiveForum(forumId) && threadbars.first().getElementsByAttributeValueContaining("src", "images/forum-closed.gif").size() == 0; unread = parsePosts(document, posts, showImages, showAvatars, hidePreviouslyReadImages, ptiFragment, canReply, currentPage == maxPage, forumId); StringBuilder builder = new StringBuilder(2048); int previouslyRead = posts.size()-unread; HashMap<String, String> headerArgs = new HashMap<String, String>(); headerArgs.put("jumpToPostId", jumpToId); headerArgs.put("fontSize", SomePreferences.fontSize); headerArgs.put("theme", getTheme(forumId)); headerArgs.put("previouslyRead", previouslyRead > 0 && unread > 0 ? previouslyRead+" Previous Post"+(previouslyRead > 1 ? "s":"") : null); MustCache.applyHeaderTemplate(builder, headerArgs); for(HashMap<String, String> post : posts){ MustCache.applyPostTemplate(builder, post); } MustCache.applyFooterTemplate(builder, null); ThreadItem cachedThread = ThreadManager.getThread(threadId); if(cachedThread != null){ cachedThread.updateUnreadCount(currentPage, maxPage, SomePreferences.threadPostPerPage); } return new ThreadPage(builder.toString(), currentPage, maxPage, threadId, forumId, threadTitle, -unread, bookmarked, canReply); }
Example 18
Source File: From baleen with Apache License 2.0 | 4 votes |
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final File f = getFileName(jCas); final DocumentAnnotation da = getDocumentAnnotation(jCas); final Document doc = Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>"); doc.outputSettings(new Document.OutputSettings().prettyPrint(false)); final Element head = doc.head(); if (!Strings.isNullOrEmpty(css)) { final Element cssLink = head.appendElement("link"); cssLink.attr("rel", "stylesheet"); cssLink.attr("href", css); } final Element charset = head.appendElement("meta"); charset.attr("charset", "utf-8"); appendMeta(head, "document.type", da.getDocType()); appendMeta(head, "document.sourceUri", da.getSourceUri()); appendMeta(head, "externalId", da.getHash()); appendMeta(head, "document.classification", da.getDocumentClassification()); appendMeta( head, "document.caveats", String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats()))); appendMeta( head, "document.releasability", String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability()))); String title = null; for (final Metadata md :, Metadata.class)) { appendMeta(head, md.getKey(), md.getValue()); if ("documentTitle".equalsIgnoreCase(md.getKey())) { title = md.getValue(); } } if (!Strings.isNullOrEmpty(title)) { doc.title(title); } final Element body = doc.body(); writeBody(jCas, body); try { FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset()); } catch (final IOException e) { throw new AnalysisEngineProcessException(e); } }
Example 19
Source File: From firing-range with Apache License 2.0 | 4 votes |
@Override public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { if (request.getParameter("q") == null) { Responses.sendError(response, "Missing q parameter", 400); return; } String q = request.getParameter("q"); Document doc = Jsoup.parseBodyFragment(q); Element body = doc.body(); Elements elements = body.getAllElements(); elements.remove(body); if (elements.isEmpty()) { Responses.sendError(response, "Invalid input, no tags", 400); return; } StringBuilder res = new StringBuilder(); for (Element element : elements) { boolean validElement = true; Attributes attributes = element.attributes(); for (Attribute attribute : attributes) { if (attribute.getKey().toLowerCase().startsWith("on") || attribute.getKey().toLowerCase().equals("href") || attribute.getKey().toLowerCase().equals("src")) { validElement = false; } if (attribute.getKey().toLowerCase().equals("style") && attribute.getValue().toLowerCase().contains("expression")) { validElement = false; } } if (validElement) { res.append(element.toString()); } } Responses.sendXssed(response, res.toString()); }
Example 20
Source File: From rebuild with GNU General Public License v3.0 | 4 votes |
/** * @return * @throws IOException */ protected static Element getMailTemplate() throws IOException { File tmp = SysConfiguration.getFileOfRes("locales/email_zh-CN.html"); Document html = Jsoup.parse(tmp, "utf-8"); return html.body(); }