Java Code Examples for org.jsoup.nodes.Document#head()
The following examples show how to use
org.jsoup.nodes.Document#head() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: URLDownloadTests.java From java_in_examples with Apache License 2.0 | 8 votes |
private static void testHtmlParser(String url) throws Exception { Document doc = Jsoup.connect(url).userAgent(USER_AGENT).cookie("auth", "token") .timeout(30000).get(); Charset charset = doc.charset(); System.out.println("charset = " + charset); System.out.println("location = " + doc.location()); System.out.println("nodeName = " + doc.nodeName()); Document.OutputSettings outputSettings = doc.outputSettings(); System.out.println("charset = " + outputSettings.charset()); System.out.println("indentAmount = " + outputSettings.indentAmount()); System.out.println("syntax = " + outputSettings.syntax()); System.out.println("escapeMode = " + outputSettings.escapeMode()); System.out.println("prettyPrint = " + outputSettings.prettyPrint()); System.out.println("outline = " + outputSettings.outline()); System.out.println("title = " + doc.title()); System.out.println("baseUri = " + doc.baseUri()); Element head = doc.head(); Elements children = head.children(); for(Element child: children) { System.out.print(child.tag().getName() + " : "); System.out.println(child); } printElements(doc.body().children()); }
Example 2
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 6 votes |
@Test public void headHasMetaTags() throws Exception { initUI(testUI, createVaadinRequest()); Document page = pageBuilder.getBootstrapPage(new BootstrapContext( request, null, session, testUI, this::contextRootRelativePath)); Element head = page.head(); Elements metas = head.getElementsByTag("meta"); Assert.assertEquals(3, metas.size()); Element meta = metas.get(0); assertEquals("Content-Type", meta.attr("http-equiv")); assertEquals("text/html; charset=utf-8", meta.attr("content")); meta = metas.get(1); assertEquals("X-UA-Compatible", meta.attr("http-equiv")); assertEquals("IE=edge", meta.attr("content")); meta = metas.get(2); assertEquals(BootstrapHandler.VIEWPORT, meta.attr("name")); assertEquals(Viewport.DEFAULT, meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE)); }
Example 3
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 6 votes |
private void bootstrapPage_productionModeTest(boolean productionMode) { mocks.setProductionMode(productionMode); TestUI anotherUI = new TestUI(); initUI(testUI); anotherUI.getInternals().setSession(session); VaadinRequest vaadinRequest = createVaadinRequest(); anotherUI.doInit(vaadinRequest, 0); anotherUI.getRouter().initializeUI(anotherUI, request); BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest, null, session, anotherUI, this::contextRootRelativePath); anotherUI.getInternals() .setContextRoot(contextRootRelativePath(request)); Document page = pageBuilder.getBootstrapPage(bootstrapContext); Element head = page.head(); Assert.assertTrue( head.outerHtml().contains("mode = " + productionMode)); }
Example 4
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void createsDocumentStructure() { String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>"; Document doc = Jsoup.parse(html); Element head = doc.head(); Element body = doc.body(); assertEquals(1, doc.children().size()); // root node: contains html node assertEquals(2, doc.child(0).children().size()); // html node: head and body assertEquals(3, head.children().size()); assertEquals(1, body.children().size()); assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name")); assertEquals(0, body.getElementsByTag("meta").size()); assertEquals("jsoup", doc.title()); assertEquals("Hello world", body.text()); assertEquals("Hello world", body.children().get(0).text()); }
Example 5
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void createsDocumentStructure() { String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>"; Document doc = Jsoup.parse(html); Element head = doc.head(); Element body = doc.body(); assertEquals(1, doc.children().size()); // root node: contains html node assertEquals(2, doc.child(0).children().size()); // html node: head and body assertEquals(3, head.children().size()); assertEquals(1, body.children().size()); assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name")); assertEquals(0, body.getElementsByTag("meta").size()); assertEquals("jsoup", doc.title()); assertEquals("Hello world", body.text()); assertEquals("Hello world", body.children().get(0).text()); }
Example 6
Source File: WxCrawlServiceImpl.java From wx-crawl with Apache License 2.0 | 5 votes |
private String getArticleTitle(Document sourceDoc) { String title = ""; if(sourceDoc.head() != null && StringUtils.isNotEmpty(sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE))) { title = sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE); } else if (sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first() != null) { title = sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first().text(); } else { title = sourceDoc.title(); } return title; }
Example 7
Source File: ArticleConvert.java From wx-crawl with Apache License 2.0 | 5 votes |
public static ArticleTransferVO convert2ArticleTransferVO(ArticleTransferVO articleTransferVO, Document doc){ Element header = doc.head(); articleTransferVO.setPublishDate(header.attr(WxCrawlerConstant.BackupArticle.PUBLISH_DATE)); articleTransferVO.setAuthor(header.attr(WxCrawlerConstant.BackupArticle.AUTHOR)); articleTransferVO.setAccountId(header.attr(WxCrawlerConstant.BackupArticle.ACCOUNT_ID)); articleTransferVO.setAccountName(header.attr(WxCrawlerConstant.BackupArticle.ACCOUNT_NAME)); articleTransferVO.setDigest(header.attr(WxCrawlerConstant.BackupArticle.DIGEST)); articleTransferVO.setOssCover(header.attr(WxCrawlerConstant.BackupArticle.COVER)); articleTransferVO.setArticleType(header.attr(WxCrawlerConstant.BackupArticle.ARTICLE_TYPE)); articleTransferVO.setTitle(header.attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE)); return articleTransferVO; }
Example 8
Source File: BaseSoup.java From ShareBox with Apache License 2.0 | 5 votes |
public Map<String, Object> doParse(Object... arg) { mArguments = arg; if (mValues == null) { mValues = new HashMap<>(); } Document doc = Jsoup.parse(mHtml); mHeader = doc.head(); mBody = doc.body(); parse(doc, mHeader, mBody, mValues); return mValues; }
Example 9
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 5 votes |
@Test public void bootstrapPage_configJsonPatternIsReplacedBeforeInitialUidl() { TestUI anotherUI = new TestUI(); initUI(testUI); SystemMessages messages = Mockito.mock(SystemMessages.class); service.setSystemMessagesProvider(info -> messages); Mockito.when(messages.isSessionExpiredNotificationEnabled()) .thenReturn(true); Mockito.when(session.getSession()) .thenReturn(Mockito.mock(WrappedSession.class)); String url = "http://{{CONFIG_JSON}}/file"; Mockito.when(messages.getSessionExpiredURL()).thenReturn(url); anotherUI.getInternals().setSession(session); VaadinRequest vaadinRequest = createVaadinRequest(); anotherUI.doInit(vaadinRequest, 0); anotherUI.getRouter().initializeUI(anotherUI, request); BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest, null, session, anotherUI, this::contextRootRelativePath); anotherUI.getInternals() .setContextRoot(contextRootRelativePath(request)); Document page = pageBuilder.getBootstrapPage(bootstrapContext); Element head = page.head(); Assert.assertTrue(head.outerHtml().contains(url)); }
Example 10
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 5 votes |
@Test public void addMultiMetaTagViaMetaAnnotation_MetaSizeCorrect_ContentCorrect() throws InvalidRouteConfigurationException { initUI(testUI, createVaadinRequest(), Collections.singleton(MetaAnnotations.class)); Document page = pageBuilder.getBootstrapPage(new BootstrapContext( request, null, session, testUI, this::contextRootRelativePath)); Element head = page.head(); Elements metas = head.getElementsByTag("meta"); Assert.assertEquals(5, metas.size()); Element meta = metas.get(0); assertEquals("Content-Type", meta.attr("http-equiv")); assertEquals("text/html; charset=utf-8", meta.attr("content")); meta = metas.get(1); assertEquals("X-UA-Compatible", meta.attr("http-equiv")); assertEquals("IE=edge", meta.attr("content")); meta = metas.get(2); assertEquals(BootstrapHandler.VIEWPORT, meta.attr("name")); assertEquals(Viewport.DEFAULT, meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE)); meta = metas.get(3); assertEquals("apple-mobile-web-app-status-bar-style", meta.attr("name")); assertEquals("black", meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE)); meta = metas.get(4); assertEquals("apple-mobile-web-app-capable", meta.attr("name")); assertEquals("yes", meta.attr(BootstrapHandler.CONTENT_ATTRIBUTE)); }
Example 11
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 5 votes |
@Test public void defaultViewport() { initUI(testUI); Document page = pageBuilder.getBootstrapPage(context); Element head = page.head(); Elements viewports = head.getElementsByAttributeValue("name", BootstrapHandler.VIEWPORT); Assert.assertEquals(1, viewports.size()); Element viewport = viewports.get(0); Assert.assertEquals(Viewport.DEFAULT, viewport.attr(BootstrapHandler.CONTENT_ATTRIBUTE)); }
Example 12
Source File: BootstrapHandlerTest.java From flow with Apache License 2.0 | 5 votes |
@Test public void viewportAnnotationOverridesDefault() throws Exception { initUI(testUI, createVaadinRequest(), Collections.singleton(RouteWithViewport.class)); Document page = pageBuilder.getBootstrapPage(context); Element head = page.head(); Elements viewports = head.getElementsByAttributeValue("name", BootstrapHandler.VIEWPORT); Assert.assertEquals(1, viewports.size()); Element viewport = viewports.get(0); Assert.assertEquals("viewport-annotation-value", viewport.attr(BootstrapHandler.CONTENT_ATTRIBUTE)); }
Example 13
Source File: AbstractHtmlConsumer.java From baleen with Apache License 2.0 | 4 votes |
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final File f = getFileName(jCas); final DocumentAnnotation da = getDocumentAnnotation(jCas); final Document doc = Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>"); doc.outputSettings(new Document.OutputSettings().prettyPrint(false)); final Element head = doc.head(); if (!Strings.isNullOrEmpty(css)) { final Element cssLink = head.appendElement("link"); cssLink.attr("rel", "stylesheet"); cssLink.attr("href", css); } final Element charset = head.appendElement("meta"); charset.attr("charset", "utf-8"); appendMeta(head, "document.type", da.getDocType()); appendMeta(head, "document.sourceUri", da.getSourceUri()); appendMeta(head, "externalId", da.getHash()); appendMeta(head, "document.classification", da.getDocumentClassification()); appendMeta( head, "document.caveats", String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats()))); appendMeta( head, "document.releasability", String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability()))); String title = null; for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) { appendMeta(head, md.getKey(), md.getValue()); if ("documentTitle".equalsIgnoreCase(md.getKey())) { title = md.getValue(); } } if (!Strings.isNullOrEmpty(title)) { doc.title(title); } final Element body = doc.body(); writeBody(jCas, body); try { FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset()); } catch (final IOException e) { throw new AnalysisEngineProcessException(e); } }
Example 14
Source File: BootstrapHandler.java From flow with Apache License 2.0 | 4 votes |
private static void setupPwa(Document document, PwaRegistry registry) { if (registry == null) { return; } PwaConfiguration config = registry.getPwaConfiguration(); if (config.isEnabled()) { // Add header injections Element head = document.head(); // Describe PWA capability for iOS devices head.appendElement(META_TAG) .attr("name", "apple-mobile-web-app-capable") .attr(CONTENT_ATTRIBUTE, "yes"); // Theme color head.appendElement(META_TAG).attr("name", "theme-color") .attr(CONTENT_ATTRIBUTE, config.getThemeColor()); head.appendElement(META_TAG) .attr("name", "apple-mobile-web-app-status-bar-style") .attr(CONTENT_ATTRIBUTE, config.getThemeColor()); // Add manifest head.appendElement("link").attr("rel", "manifest").attr("href", config.getManifestPath()); // Add icons for (PwaIcon icon : registry.getHeaderIcons()) { head.appendChild(icon.asElement()); } // Add service worker initialization head.appendElement(SCRIPT_TAG) .text("if ('serviceWorker' in navigator) {\n" + " window.addEventListener('load', function() {\n" + " navigator.serviceWorker.register('" + config.getServiceWorkerPath() + "');\n" + " });\n" + "}"); } }