Java Code Examples for org.jsoup.nodes.Document#toString()
The following examples show how to use
org.jsoup.nodes.Document#toString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Downloader.java From MMDownloader with Apache License 2.0 | 5 votes |
/** * Jsoup을 이용한 HTML 코드 파싱. * * @param eachArchiveAddress 실제 만화가 담긴 아카이브 주소 * @return 성공하면 html 코드를 리턴 */ private String getHtmlPageJsoup(String eachArchiveAddress) throws Exception { print.info("고속 연결 시도중...\n"); // pageSource = Html코드를 포함한 페이지 소스코드가 담길 스트링, domain = http://wasabisyrup.com <-마지막 / 안붙음! String pageSource = null; // POST방식으로 아예 처음부터 비밀번호를 body에 담아 전달 Response response = Jsoup.connect(eachArchiveAddress) .userAgent(UserAgent.getUserAgent()) .header("charset", "utf-8") .header("Accept-Encoding", "gzip") //20171126 gzip 추가 .timeout(MAX_WAIT_TIME) // timeout .data("pass", PASSWORD) // 20180429 기준 마루마루에서 reCaptcha를 사용하기에 의미없음 .followRedirects(true) .execute(); Document preDoc = response.parse(); //받아온 HTML 코드를 저장 // <div class="gallery-template">이 만화 담긴 곳. if (preDoc.select("div.gallery-template").isEmpty()) { throw new RuntimeException("Jsoup Parsing Failed: No tag found"); } else { // 만약 Jsoup 파싱 시 내용 있으면 성공 pageSource = preDoc.toString(); } print.info("고속 연결 성공!\n"); return pageSource; //성공 시 html코드 리턴 }
Example 2
Source File: TemplateRender.java From jpress with GNU Lesser General Public License v3.0 | 5 votes |
public String buildNormalHtml(String content) { if (StrUtil.isBlank(content)) { return content; } Document doc = Jsoup.parse(content); doc.outputSettings().prettyPrint(false); doc.outputSettings().outline(false); Elements jsElements = doc.select("script"); replace(jsElements, "src"); Elements imgElements = doc.select("img"); replace(imgElements, "src"); Elements linkElements = doc.select("link"); replace(linkElements, "href"); //开启模板预览功能 if (templatePreviewEnable && TemplateManager.me().getPreviewTemplate() != null) { Elements aElements = doc.select("a"); replacePreviewHref(aElements); } return doc.toString(); }
Example 3
Source File: _WechatArticleImport.java From jpress with GNU Lesser General Public License v3.0 | 5 votes |
private String processContentImages(String content, List<String> imageUrls) { Document doc = Jsoup.parse(content); Elements imgElements = doc.select("img"); if (imgElements != null) { Iterator<Element> iterator = imgElements.iterator(); while (iterator.hasNext()) { Element element = iterator.next(); String imageUrl = element.hasAttr("src") ? element.attr("src") : element.attr("data-src"); //http://mmbiz.qpic.cn/mmbiz/4gZTdZfnQeDvQqCZFuVvYv8scGS7sEQTRETgISib1blz5iclAtnsccaJhaugmKc // hhm8mFOtjnicibibumazy8wPS6Xg/640?tp=webp&wxfrom=5&wx_lazy=1&wx_co=1 imageUrl = replaceLast(imageUrl, "/", "__"); imageUrl = imageUrl.startsWith("http://") ? imageUrl.replace("http://", "/attachment/") : imageUrl.replace("https://", "/attachment/s"); imageUrl = imageUrl.replace("?",".png?"); element.removeAttr("data-src"); element.attr("src",imageUrl); imageUrls.add(imageUrl); } } return doc.toString(); }
Example 4
Source File: LinkRewriterServiceImpl.java From publick-sling-blog with Apache License 2.0 | 5 votes |
/** * Rewrite all links in an HTML string based on the extensionless URLs settings. * * @param value The HTML string. * @param requestHost The host name from the request. * @return The HTML string with rewritten URLs. */ public String rewriteAllLinks(final String html, final String requestHost) { Document document = Jsoup.parse(html); Elements links = document.select("a[href]"); Elements metas = document.select("meta[content]"); updateAttribute(links, "href", requestHost); updateAttribute(metas, "content", requestHost); return document.toString(); }
Example 5
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void testInvalidTableContents() throws IOException { File in = ParseTest.getFile("/htmltests/table-invalid-elements.html"); Document doc = Jsoup.parse(in, "UTF-8"); doc.outputSettings().prettyPrint(true); String rendered = doc.toString(); int endOfEmail = rendered.indexOf("Comment"); int guarantee = rendered.indexOf("Why am I here?"); assertTrue("Comment not found", endOfEmail > -1); assertTrue("Search text not found", guarantee > -1); assertTrue("Search text did not come after comment", guarantee > endOfEmail); }
Example 6
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void testInvalidTableContents() throws IOException { File in = ParseTest.getFile("/htmltests/table-invalid-elements.html"); Document doc = Jsoup.parse(in, "UTF-8"); doc.outputSettings().prettyPrint(true); String rendered = doc.toString(); int endOfEmail = rendered.indexOf("Comment"); int guarantee = rendered.indexOf("Why am I here?"); assertTrue("Comment not found", endOfEmail > -1); assertTrue("Search text not found", guarantee > -1); assertTrue("Search text did not come after comment", guarantee > endOfEmail); }
Example 7
Source File: DownloadPostFragment.java From Instagram-Profile-Downloader with MIT License | 4 votes |
@Override protected String doInBackground(String... f_url) { try { Document doc = Jsoup.connect(f_url[0]).get(); String html = doc.toString(); type = false; //for caption int indexcaption = html.indexOf("edge_media_to_caption"); indexcaption += 48; int startCaption = html.indexOf("\"", indexcaption); startCaption += 1; int endCaption = html.indexOf("\"", startCaption); String strCaption = null; strCaption = html.substring(startCaption, endCaption); //setting caption flag=0 for caption flag=1 for vid flag=2 for image publishProgress("0", strCaption); //for video int indexVid = html.indexOf("\"video_url\""); indexVid += 11; int startVid = html.indexOf("\"", indexVid); startVid += 1; int endVid = html.indexOf("\"", startVid); String urlVid = null; urlVid = html.substring(startVid, endVid); if (!urlVid.equalsIgnoreCase("en")) { // it is a vid show play btn type = true; } //for image url int index = html.indexOf("display_url"); index += 13; int start = html.indexOf("\"", index); start += 1; int end = html.indexOf("\"", start); // System.out.println("start:"+start+ "end:"+ end); String urlImage = html.substring(start, end); // Bitmap mIcon11 = null; // try { // InputStream in = new java.net.URL(urlImage).openStream(); // mIcon11 = BitmapFactory.decodeStream(in); // } catch (Exception e) { // Log.e("Error", e.getMessage()); // e.printStackTrace(); // } // return mIcon11; return urlImage; } catch (Exception e) { Log.e("Error: ", e.getMessage()); } return null; }
Example 8
Source File: DownloadIGTVFragment.java From Instagram-Profile-Downloader with MIT License | 4 votes |
@Override protected String doInBackground(String... f_url) { try { Document doc = Jsoup.connect(f_url[0]).get(); String html = doc.toString(); type = false; //for caption int indexcaption = html.indexOf("edge_media_to_caption"); indexcaption += 48; int startCaption = html.indexOf("\"", indexcaption); startCaption += 1; int endCaption = html.indexOf("\"", startCaption); String strCaption = null; strCaption = html.substring(startCaption, endCaption); //setting caption flag=0 for caption flag=1 for vid flag=2 for image publishProgress("0", strCaption); //for video int indexVid = html.indexOf("\"video_url\""); indexVid += 11; int startVid = html.indexOf("\"", indexVid); startVid += 1; int endVid = html.indexOf("\"", startVid); String urlVid = null; urlVid = html.substring(startVid, endVid); if (!urlVid.equalsIgnoreCase("en")) { // it is a vid show play btn type = true; } //for image url int index = html.indexOf("display_url"); index += 13; int start = html.indexOf("\"", index); start += 1; int end = html.indexOf("\"", start); // System.out.println("start:"+start+ "end:"+ end); String urlImage = html.substring(start, end); return urlImage; } catch (Exception e) { Log.e("Error: ", e.getMessage()); } return null; }
Example 9
Source File: HtmlUtil.java From V2EX with GNU General Public License v3.0 | 4 votes |
public static String applyHtmlStyle(String html, Context context){ TypedValue typedColor = new TypedValue(); context.getTheme().resolveAttribute(R.attr.attr_color_text, typedColor, true); int txtColor = typedColor.data; context.getTheme().resolveAttribute(R.attr.attr_color_text_link, typedColor, true); int linkColor = typedColor.data; context.getTheme().resolveAttribute(R.attr.attr_color_accent, typedColor, true); int codeColor = typedColor.data; context.getTheme().resolveAttribute(R.attr.attr_color_text_secondary, typedColor, true); int codeBackground = typedColor.data; String textColorStr = "#" + Integer.toHexString(Color.red(txtColor)) + Integer.toHexString(Color.green(txtColor)) + Integer.toHexString(Color.blue(txtColor)); String linkColorStr = "#" + Integer.toHexString(Color.red(linkColor)) + Integer.toHexString(Color.green(linkColor)) + Integer.toHexString(Color.blue(linkColor)); String codeColorStr = "#" + Integer.toHexString(Color.red(codeColor)) + Integer.toHexString(Color.green(codeColor)) + Integer.toHexString(Color.blue(codeColor)); String codeBackgroundStr = "#" + Integer.toHexString(Color.red(codeBackground)) + Integer.toHexString(Color.green(codeBackground)) + Integer.toHexString(Color.blue(codeBackground)); if (html == null || html.equals("")){ return ""; } Document document = Jsoup.parse(html); document.head() .append( "<style type=\"text/css\">" + "body{width:95%;}" + "* {" + " color:" + textColorStr + ";" + "}" + "a {" + " color:" + linkColorStr + ";" + "word-wrap:break-word;" + "}" + "code,pre {" + " color: " + codeColorStr + ";" + " background: " + codeBackgroundStr + ";" + " padding: 3px;" + " border-radius: 5px;" + "word-wrap:normal;" + "} img { border:1px solid grey;}" + "</style>"); document.head() .append("<meta name=\"content-type\" content=\"text/html; charset=utf-8\">" + "<meta http-equlv=\"Content-Type\" content=\"text/html;charset=utf-8\">"); for (Element img:document.select("img")){ img.attr("width","100%"); img.attr("height","auto"); } document.charset(Charset.forName("utf-8")); return document.toString(); }
Example 10
Source File: RenderHelpler.java From jboot with Apache License 2.0 | 4 votes |
public static String processCDN(String content, String domain) { if (StrUtil.isBlank(content)) { return content; } Document doc = Jsoup.parse(content); Elements jsElements = doc.select("script[src]"); replace(jsElements, "src", domain); Elements imgElements = doc.select("img[src]"); replace(imgElements, "src", domain); Elements linkElements = doc.select("link[href]"); replace(linkElements, "href", domain); return doc.toString(); }
Example 11
Source File: HtmlBeautifier.java From cute-proxy with BSD 2-Clause "Simplified" License | 4 votes |
@Override public String beautify(String s, Charset charset) { Document doc = Jsoup.parse(s); doc.outputSettings().indentAmount(4); return doc.toString(); }