org.jsoup.parser.Tag Java Examples
The following examples show how to use
org.jsoup.parser.Tag.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example #2
Source File: GuxiaobeiPageHandler.java From cetty with Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); for (final Element pEl : tempBody) { if (pEl.select("div.open-message,div.jp-relatedposts,div.article-social").size() != 0) { continue; } if (pEl.tagName().equals("p")) { Element imgEl = pEl.select("img").first(); if (imgEl != null) { String src = imgEl.attr("src"); if (src.contains("data:image")) { src = imgEl.attr("data-src"); } else if (!src.contains("www.guxiaobei.com")) { src = "http://www.guxiaobei.com" + src; } imgEl.attr("src", src); articleBody.appendChild(buildFigure(imgEl)); continue; } } articleBody.appendChild(pEl); } return articleBody; }
Example #3
Source File: Waimaob2cPageHandler.java From cetty with Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); for (Element body : tempBody) { if (body.tagName().equals("p")) { boolean skipRegister = body.select("p").text().contains("即刻注册SHOPIFY账户, 跟着我们精心准备的SHOPIFY教程开始外贸独立站之旅!"); boolean skipCopyRight = body.classNames().contains("post-copyright"); if (skipRegister || skipCopyRight) { continue; } } Element imgEl = body.select("img").first(); if (imgEl != null) { articleBody.appendChild(buildFigure(imgEl)); continue; } articleBody.appendChild(body); } return articleBody; }
Example #4
Source File: CifnewsPageHandler.java From cetty with Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); String blockquote = tempBody.select("div.fetch-read>div.summary").text(); buildBlockquote(blockquote, articleBody); Elements inner = tempBody.select("div.article-inner>*"); for (Element pEl : inner) { if (pEl.select("div.fetch-present").size() != 0) { continue; } Element imgEl = pEl.select("p>img").first(); if (imgEl != null) { Element figure = buildFigure(imgEl); if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) { Element figcaption = buildFigcaption(imgEl.nextElementSibling().text()); figure.appendChild(figcaption); articleBody.appendChild(figure); continue; } articleBody.appendChild(figure); continue; } articleBody.appendChild(pEl); } return articleBody; }
Example #5
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example #6
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example #7
Source File: SimpleTextElementBuilderTest.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
/** * Test of buildTextFromElement method, of class SimpleTextElementBuilder. */ public void testBuildTextFromElementWithChildren() { LOGGER.debug("buildTextFromElementWithChildren"); Element element = new Element(Tag.valueOf("div"), ""); element.appendText(" text1 "); Element childElement = new Element(Tag.valueOf("div"), ""); childElement.text(" child element text "); Element childElement2 = new Element(Tag.valueOf("div"), ""); childElement2.text(" child element text second level "); childElement.appendChild(childElement2); element.appendChild(childElement); element.appendText(" text2 "); SimpleTextElementBuilder instance = new SimpleTextElementBuilder(); String expResult = "text1 child element text child element text second level text2"; String result = instance.buildTextFromElement(element); assertEquals(expResult, result); }
Example #8
Source File: Cleaner.java From astor with GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example #9
Source File: Cleaner.java From astor with GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example #10
Source File: StructuralAnnotationsTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testArticle() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Map<String, Class<?>> expectedArticle = new HashMap<>(); expectedArticle.put("Sheet", Sheet.class); expectedArticle.put("Slide", Slide.class); expectedArticle.put("Page", Page.class); expectedArticle.put("Another", Page.class); for (final Map.Entry<String, Class<?>> e : expectedArticle.entrySet()) { final Element anchor = new Element(Tag.valueOf("article"), ""); anchor.attr("class", e.getKey()); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, anchor, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
Example #11
Source File: SemanticHtmlTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testMain() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); SemanticHtml sa = new SemanticHtml(); Map<String, Class<?>> expectedMain = new HashMap<>(); expectedMain.put("time", Temporal.class); expectedMain.put("meter", Quantity.class); expectedMain.put("dfn", Buzzword.class); expectedMain.put("address", Location.class); expectedMain.put("abbr", Buzzword.class); expectedMain.put("cite", DocumentReference.class); for (Map.Entry<String, Class<?>> e : expectedMain.entrySet()) { Element element = new Element(Tag.valueOf(e.getKey()), ""); AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, element, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
Example #12
Source File: StructuralAnnotationsTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testLink() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Element a1 = new Element(Tag.valueOf("a"), ""); a1.attr("href", "http://example.com"); final Element a2 = new Element(Tag.valueOf("a"), ""); a2.attr("href", "/example.com"); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, a1, collector); sa.map(jCas, a2, collector); Annotation link = collector.getAnnotations().get(0); assertTrue(link instanceof Link); assertEquals("http://example.com", ((Link) link).getTarget()); Annotation link2 = collector.getAnnotations().get(1); assertTrue(link2 instanceof Link); assertEquals("/example.com", ((Link) link2).getTarget()); }
Example #13
Source File: BootstrapHandler.java From flow with Apache License 2.0 | 5 votes |
private Element createStylesheetElement(String url) { final Element cssElement; if (url != null) { cssElement = new Element(Tag.valueOf("link"), "") .attr("rel", "stylesheet") .attr("type", CSS_TYPE_ATTRIBUTE_VALUE) .attr("href", url); } else { cssElement = new Element(Tag.valueOf("style"), "").attr("type", CSS_TYPE_ATTRIBUTE_VALUE); } return cssElement; }
Example #14
Source File: ElementTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void whiteSpaceClassElement(){ Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); Element el = new Element(tag, "", attribs); attribs.put("class", "abc "); boolean hasClass = el.hasClass("ab"); assertFalse(hasClass); }
Example #15
Source File: CssTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void root() { Elements sel = html.select(":root"); assertEquals(1, sel.size()); assertNotNull(sel.get(0)); assertEquals(Tag.valueOf("html"), sel.get(0).tag()); Elements sel2 = html.select("body").select(":root"); assertEquals(1, sel2.size()); assertNotNull(sel2.get(0)); assertEquals(Tag.valueOf("body"), sel2.get(0).tag()); }
Example #16
Source File: ElementTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void insertChildrenAtPosition() { Document doc = Jsoup.parse("<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>"); Element div1 = doc.select("div").get(0); Elements p1s = div1.select("p"); Element div2 = doc.select("div").get(1); assertEquals(2, div2.childNodeSize()); div2.insertChildren(-1, p1s); assertEquals(2, div1.childNodeSize()); // moved two out assertEquals(4, div2.childNodeSize()); assertEquals(3, p1s.get(1).siblingIndex()); // should be last List<Node> els = new ArrayList<Node>(); Element el1 = new Element(Tag.valueOf("span"), "").text("Span1"); Element el2 = new Element(Tag.valueOf("span"), "").text("Span2"); TextNode tn1 = new TextNode("Text4", ""); els.add(el1); els.add(el2); els.add(tn1); assertNull(el1.parent()); div2.insertChildren(-2, els); assertEquals(div2, el1.parent()); assertEquals(7, div2.childNodeSize()); assertEquals(3, el1.siblingIndex()); assertEquals(4, el2.siblingIndex()); assertEquals(5, tn1.siblingIndex()); }
Example #17
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void after() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().after(newNode); assertEquals("<p>One <b>two</b><em>four</em> three</p>", doc.body().html()); doc.select("b").first().after("<i>five</i>"); assertEquals("<p>One <b>two</b><i>five</i><em>four</em> three</p>", doc.body().html()); }
Example #18
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void after() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().after(newNode); assertEquals("<p>One <b>two</b><em>four</em> three</p>", doc.body().html()); doc.select("b").first().after("<i>five</i>"); assertEquals("<p>One <b>two</b><i>five</i><em>four</em> three</p>", doc.body().html()); }
Example #19
Source File: JsoupCssInliner.java From ogham with Apache License 2.0 | 5 votes |
/** * Replace link tags with style tags in order to keep the same inclusion * order * * @param doc * the html document * @param cssContents * the list of external css files with their content */ private static void internStyles(Document doc, List<ExternalCss> cssContents) { Elements els = doc.select(CSS_LINKS_SELECTOR); for (Element e : els) { if (isInlineModeAllowed(e, InlineModes.STYLE_ATTR)) { String path = e.attr(HREF_ATTR); ExternalCss css = getCss(cssContents, path); if (css != null) { Element style = new Element(Tag.valueOf(STYLE_TAG), ""); style.appendChild(new DataNode(getCssContent(css))); e.replaceWith(style); } } } }
Example #20
Source File: Element.java From astor with GNU General Public License v2.0 | 5 votes |
/** * Finds elements, including and recursively under this element, with the specified tag name. * @param tagName The tag name to search for (case insensitively). * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. */ public Elements getElementsByTag(String tagName) { Validate.notEmpty(tagName); tagName = tagName.toLowerCase().trim(); return Collector.collect(new Evaluator.Tag(tagName), this); }
Example #21
Source File: AppShellSettings.java From flow with Apache License 2.0 | 5 votes |
private static Element createElement(String tag, String content, String... attrs) { Element elm = new Element(Tag.valueOf(tag), ""); if (content != null && !content.isEmpty()) { elm.appendChild(new DataNode(content)); } for (int i = 0; i < attrs.length - 1; i += 2) { elm.attr(attrs[i], attrs[i + 1]); } return elm; }
Example #22
Source File: GithubDownLoadTests.java From java_in_examples with Apache License 2.0 | 5 votes |
private static List<LinkContainer> work(Elements elements) { List<LinkContainer> result = new ArrayList<>(elements.size()); String currentCategory = null; for(Element element: elements) { Tag tag = element.tag(); if(isHeader(tag)) { currentCategory = element.text(); System.out.println(currentCategory); } work(element.children()); } return result; }
Example #23
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void orphanNodeReturnsNullForSiblingElements() { Node node = new Element(Tag.valueOf("p"), ""); Element el = new Element(Tag.valueOf("p"), ""); assertEquals(0, node.siblingIndex()); assertEquals(0, node.siblingNodes().size()); assertNull(node.previousSibling()); assertNull(node.nextSibling()); assertEquals(0, el.siblingElements().size()); assertNull(el.previousElementSibling()); assertNull(el.nextElementSibling()); }
Example #24
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void orphanNodeReturnsNullForSiblingElements() { Node node = new Element(Tag.valueOf("p"), ""); Element el = new Element(Tag.valueOf("p"), ""); assertEquals(0, node.siblingIndex()); assertEquals(0, node.siblingNodes().size()); assertNull(node.previousSibling()); assertNull(node.nextSibling()); assertEquals(0, el.siblingElements().size()); assertNull(el.previousElementSibling()); assertNull(el.nextElementSibling()); }
Example #25
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void before() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().before(newNode); assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html()); doc.select("b").first().before("<i>five</i>"); assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html()); }
Example #26
Source File: Element.java From astor with GNU General Public License v2.0 | 5 votes |
/** * Finds elements, including and recursively under this element, with the specified tag name. * @param tagName The tag name to search for (case insensitively). * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. */ public Elements getElementsByTag(String tagName) { Validate.notEmpty(tagName); tagName = normalize(tagName); return Collector.collect(new Evaluator.Tag(tagName), this); }
Example #27
Source File: Element.java From astor with GNU General Public License v2.0 | 5 votes |
/** * Finds elements, including and recursively under this element, with the specified tag name. * @param tagName The tag name to search for (case insensitively). * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. */ public Elements getElementsByTag(String tagName) { Validate.notEmpty(tagName); tagName = normalize(tagName); return Collector.collect(new Evaluator.Tag(tagName), this); }
Example #28
Source File: CssTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void root() { Elements sel = html.select(":root"); assertEquals(1, sel.size()); assertNotNull(sel.get(0)); assertEquals(Tag.valueOf("html"), sel.get(0).tag()); Elements sel2 = html.select("body").select(":root"); assertEquals(1, sel2.size()); assertNotNull(sel2.get(0)); assertEquals(Tag.valueOf("body"), sel2.get(0).tag()); }
Example #29
Source File: NodeTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void before() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().before(newNode); assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html()); doc.select("b").first().before("<i>five</i>"); assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html()); }
Example #30
Source File: BootstrapHandler.java From flow with Apache License 2.0 | 5 votes |
private static Element createJavaScriptElement(String sourceUrl, boolean defer, String type) { Element jsElement = new Element(Tag.valueOf(SCRIPT_TAG), "") .attr("type", type).attr(DEFER_ATTRIBUTE, defer); if (sourceUrl != null) { jsElement = jsElement.attr("src", sourceUrl); } return jsElement; }