org.jsoup.nodes.Document.OutputSettings Java Examples
The following examples show how to use
org.jsoup.nodes.Document.OutputSettings.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DocumentTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testHtmlAndXmlSyntax() { String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'><>&"<foo />bar"; Document doc = Jsoup.parse(h); doc.outputSettings().syntax(Syntax.html); assertEquals("<!doctype html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async checked src=\"&<>"\"><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); assertEquals("<!DOCTYPE html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async=\"\" checked=\"checked\" src=\"&<>"\" /><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); }
Example #2
Source File: DocumentTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testHtmlAndXmlSyntax() { String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'><>&"<foo />bar"; Document doc = Jsoup.parse(h); doc.outputSettings().syntax(Syntax.html); assertEquals("<!doctype html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async checked src=\"&<>"\"><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); assertEquals("<!DOCTYPE html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async=\"\" checked=\"checked\" src=\"&<>"\" /><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); }
Example #3
Source File: XHTMLDocumentHandler.java From docx4j-template with Apache License 2.0 | 6 votes |
/** * Jsoup.parse(in, charsetName, baseUri) */ @Override public Document handle( InputStream input) throws IOException{ //获取Jsoup参数 String charsetName = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_CHARSETNAME, Docx4jConstants.DEFAULT_CHARSETNAME ); String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,""); //使用Jsoup将html转换成Document对象 Document doc = Jsoup.parse(input, charsetName, baseUri); OutputSettings outputSettings = new OutputSettings(); outputSettings.prettyPrint(false); /* outputSettings.syntax(syntax) outputSettings.charset(charset) outputSettings*/ doc.outputSettings(outputSettings); //返回Document对象 return doc; }
Example #4
Source File: JsoupTest.java From crawler-jsoup-maven with Apache License 2.0 | 6 votes |
public static void main(String[] args) { String d = "<span><div>test</div></span>"; Document doc = Jsoup.parse(d); Element div = doc.select("div").first(); // <div></div> div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div> div.prepend("<p>First</p>"); div.append("<p>Last</p>"); // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div> div.appendElement(d); Element span = doc.select("span").first(); // <span>One</span> span.wrap("<li><a href='http://example.com/'></a></li>"); // now: <li><a href="http://example.com"><span>One</span></a></li> System.out.println(doc.html()); String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false)); System.out.println(s); }
Example #5
Source File: DocumentTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testHtmlAndXmlSyntax() { String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'><>&"<foo />bar"; Document doc = Jsoup.parse(h); doc.outputSettings().syntax(Syntax.html); assertEquals("<!doctype html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async checked src=\"&<>"\"><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); assertEquals("<!DOCTYPE html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async=\"\" checked=\"checked\" src=\"&<>"\" /><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); }
Example #6
Source File: JsoupBasedFormatter.java From formatter-maven-plugin with Apache License 2.0 | 5 votes |
@Override public void init(Map<String, String> options, ConfigurationSource cfg) { super.initCfg(cfg); formatter = new OutputSettings(); formatter.charset(Charset.forName(options.getOrDefault("charset", StandardCharsets.UTF_8.name()))); formatter.escapeMode(EscapeMode.valueOf(options.getOrDefault("escapeMode", EscapeMode.xhtml.name()))); formatter.indentAmount(Integer.parseInt(options.getOrDefault("indentAmount", "4"))); formatter.outline(Boolean.parseBoolean(options.getOrDefault("outlineMode", Boolean.TRUE.toString()))); formatter.prettyPrint(Boolean.parseBoolean(options.getOrDefault("pretty", Boolean.TRUE.toString()))); formatter.syntax(Syntax.valueOf(options.getOrDefault("syntax", Syntax.html.name()))); }
Example #7
Source File: DocumentTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void testHtmlAppendable() { String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>"; Document document = Jsoup.parse(htmlContent); OutputSettings outputSettings = new OutputSettings(); outputSettings.prettyPrint(false); document.outputSettings(outputSettings); assertEquals(htmlContent, document.html(new StringWriter()).toString()); }
Example #8
Source File: DocumentTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void testHtmlAppendable() { String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>"; Document document = Jsoup.parse(htmlContent); OutputSettings outputSettings = new OutputSettings(); outputSettings.prettyPrint(false); document.outputSettings(outputSettings); assertEquals(htmlContent, document.html(new StringWriter()).toString()); }
Example #9
Source File: DocumentTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void testHtmlAppendable() { String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>"; Document document = Jsoup.parse(htmlContent); OutputSettings outputSettings = new OutputSettings(); outputSettings.prettyPrint(false); document.outputSettings(outputSettings); assertEquals(htmlContent, document.html(new StringWriter()).toString()); }
Example #10
Source File: TextFilterManage.java From bbs with GNU Affero General Public License v3.0 | 5 votes |
/** * 富文本过滤标签 * @param request * @param html * @return */ public String filterTag(HttpServletRequest request,String html) { if(StringUtils.isBlank(html)) return ""; Whitelist whitelist = this.filterParameter(null); //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化) }
Example #11
Source File: HTMLSanitizer.java From document-management-software with GNU Lesser General Public License v3.0 | 5 votes |
public static String sanitizeSimpleText(String unsafeHtmlContent) { OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false); Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false); String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings); sanitized = StringEscapeUtils.unescapeHtml(sanitized); return sanitized; }
Example #12
Source File: ZeppelinRDisplay.java From zeppelin with Apache License 2.0 | 4 votes |
private static RDisplay textDisplay(Element body) { // remove HTML tag while preserving whitespaces and newlines String text = Jsoup.clean(body.html(), "", Whitelist.none(), new OutputSettings().prettyPrint(false)); return new RDisplay(text, Type.TEXT, Code.SUCCESS); }
Example #13
Source File: NewPipeService.java From SkyTube with GNU General Public License v3.0 | 4 votes |
private String filterHtml(String content) { return Jsoup.clean(content, "", Whitelist.basic(), new OutputSettings().prettyPrint(false)); }
Example #14
Source File: HtmlAnyMatchTest.java From doov with Apache License 2.0 | 4 votes |
static String format(Context context, Document doc) { return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n" + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString(); }
Example #15
Source File: HtmlSampleRulesTest.java From doov with Apache License 2.0 | 4 votes |
static String format(Context context, Document doc) { return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n" + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString(); }
Example #16
Source File: TextFilterManage.java From bbs with GNU Affero General Public License v3.0 | 3 votes |
/** * 富文本过滤标签 * @param request * @param html 内容 * @param editorTag 评论编辑器标签 * @return */ public String filterTag(HttpServletRequest request,String html,EditorTag editorTag) { if(StringUtils.isBlank(html)) return ""; Whitelist whitelist = this.filterParameter(editorTag); //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化) }
Example #17
Source File: DocumentDataPageModContentFactoryImpl.java From cia with Apache License 2.0 | 2 votes |
@Secured({ "ROLE_ANONYMOUS", "ROLE_USER", "ROLE_ADMIN" }) @Override public Layout createContent(final String parameters, final MenuBar menuBar, final Panel panel) { final VerticalLayout panelContent = createPanelContent(); final String pageId = getPageId(parameters); getDocumentMenuItemFactory().createDocumentMenuBar(menuBar, pageId); LabelFactory.createHeader2Label(panelContent, DOCUMENT_DATA); final DataContainer<DocumentContentData, String> documentContentDataDataContainer = getApplicationManager() .getDataContainer(DocumentContentData.class); final List<DocumentContentData> documentContentlist = documentContentDataDataContainer .getAllBy(DocumentContentData_.id, pageId); if (!documentContentlist.isEmpty()) { final Panel formPanel = new Panel(); formPanel.setSizeFull(); panelContent.addComponent(formPanel); final FormLayout formContent = new FormLayout(); formPanel.setContent(formContent); final String cleanContent = Jsoup.clean(documentContentlist.get(0).getContent(), "", Whitelist.simpleText(), new OutputSettings().indentAmount(4)); final Label htmlContent = new Label(cleanContent, ContentMode.PREFORMATTED); formContent.addComponent(htmlContent); final DocumentWordCountRequest documentWordCountRequest = new DocumentWordCountRequest(); documentWordCountRequest.setDocumentId(pageId); documentWordCountRequest.setMaxResults(MAX_RESULTS); documentWordCountRequest.setSessionId(RequestContextHolder.currentRequestAttributes().getSessionId()); final DocumentWordCountResponse resp = (DocumentWordCountResponse) getApplicationManager() .service(documentWordCountRequest); if (resp.getWordCountMap() != null) { final Label wordCloud = new Label(createWordCloud(resp.getWordCountMap()), ContentMode.HTML); formContent.addComponent(wordCloud); } panelContent.setExpandRatio(formPanel, ContentRatio.GRID); } panel.setContent(panelContent); getPageActionEventHelper().createPageEvent(ViewAction.VISIT_DOCUMENT_VIEW, ApplicationEventGroup.USER, NAME, parameters, pageId); return panelContent; }