org.jsoup.nodes.Attribute Java Examples
The following examples show how to use
org.jsoup.nodes.Attribute.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AttributeParseTest.java From astor with GNU General Public License v2.0 | 7 votes |
@Test public void parsesBooleanAttributes() { String html = "<a normal=\"123\" boolean empty=\"\"></a>"; Element el = Jsoup.parse(html).select("a").first(); assertEquals("123", el.attr("normal")); assertEquals("", el.attr("boolean")); assertEquals("", el.attr("empty")); List<Attribute> attributes = el.attributes().asList(); assertEquals("There should be 3 attribute present", 3, attributes.size()); // Assuming the list order always follows the parsed html assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute); assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute); assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute); assertEquals(html, el.outerHtml()); }
Example #2
Source File: HtmlDecoder.java From metafacture-core with Apache License 2.0 | 6 votes |
private void process(Element parent, StreamReceiver receiver) { for (Element element : parent.children()) { receiver.startEntity(element.nodeName()); Attributes attributes = element.attributes(); for (Attribute attribute : attributes) { receiver.literal(attribute.getKey(), attribute.getValue()); } if (element.children().isEmpty()) { String text = element.text().trim(); String value = text.isEmpty() ? element.data() : text; if (!value.isEmpty()) { receiver.literal("value", value); } } process(element, receiver); receiver.endEntity(); } }
Example #3
Source File: Cleaner.java From astor with GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example #4
Source File: AttributeParseTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void parsesBooleanAttributes() { String html = "<a normal=\"123\" boolean empty=\"\"></a>"; Element el = Jsoup.parse(html).select("a").first(); assertEquals("123", el.attr("normal")); assertEquals("", el.attr("boolean")); assertEquals("", el.attr("empty")); List<Attribute> attributes = el.attributes().asList(); assertEquals("There should be 3 attribute present", 3, attributes.size()); // Assuming the list order always follows the parsed html assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute); assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute); assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute); assertEquals(html, el.outerHtml()); }
Example #5
Source File: W3CDom.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Finds any namespaces defined in this element. Returns any tag prefix. */ private String updateNamespaces(org.jsoup.nodes.Element el) { // scan the element for namespace declarations // like: xmlns="blah" or xmlns:prefix="blah" Attributes attributes = el.attributes(); for (Attribute attr : attributes) { String key = attr.getKey(); String prefix; if (key.equals(xmlnsKey)) { prefix = ""; } else if (key.startsWith(xmlnsPrefix)) { prefix = key.substring(xmlnsPrefix.length()); } else { continue; } namespaces.put(prefix, attr.getValue()); } // get the element prefix if any int pos = el.tagName().indexOf(":"); return pos > 0 ? el.tagName().substring(0, pos) : ""; }
Example #6
Source File: Cleaner.java From astor with GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example #7
Source File: AttributeParseTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void parsesBooleanAttributes() { String html = "<a normal=\"123\" boolean empty=\"\"></a>"; Element el = Jsoup.parse(html).select("a").first(); assertEquals("123", el.attr("normal")); assertEquals("", el.attr("boolean")); assertEquals("", el.attr("empty")); List<Attribute> attributes = el.attributes().asList(); assertEquals("There should be 3 attribute present", 3, attributes.size()); // Assuming the list order always follows the parsed html assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute); assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute); assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute); assertEquals(html, el.outerHtml()); }
Example #8
Source File: W3CDom.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Finds any namespaces defined in this element. Returns any tag prefix. */ private String updateNamespaces(org.jsoup.nodes.Element el) { // scan the element for namespace declarations // like: xmlns="blah" or xmlns:prefix="blah" Attributes attributes = el.attributes(); for (Attribute attr : attributes) { String key = attr.getKey(); String prefix; if (key.equals(xmlnsKey)) { prefix = ""; } else if (key.startsWith(xmlnsPrefix)) { prefix = key.substring(xmlnsPrefix.length()); } else { continue; } namespaces.put(prefix, attr.getValue()); } // get the element prefix if any int pos = el.tagName().indexOf(":"); return pos > 0 ? el.tagName().substring(0, pos) : ""; }
Example #9
Source File: W3CDom.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Finds any namespaces defined in this element. Returns any tag prefix. */ private String updateNamespaces(org.jsoup.nodes.Element el) { // scan the element for namespace declarations // like: xmlns="blah" or xmlns:prefix="blah" Attributes attributes = el.attributes(); for (Attribute attr : attributes) { String key = attr.getKey(); String prefix; if (key.equals(xmlnsKey)) { prefix = ""; } else if (key.startsWith(xmlnsPrefix)) { prefix = key.substring(xmlnsPrefix.length()); } else { continue; } namespaces.put(prefix, attr.getValue()); } // get the element prefix if any int pos = el.tagName().indexOf(":"); return pos > 0 ? el.tagName().substring(0, pos) : ""; }
Example #10
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); if (attributes.containsKey(tag)) { if (attributes.get(tag).contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example #11
Source File: WebComponentBootstrapHandler.java From flow with Apache License 2.0 | 6 votes |
/** * Creates a javascript which copies attributes from the {@code element} to * the created DOM element identified by {@code elementRef}. If {@code * element} contains a {@code src} attribute, its path is prefixed with * {@code basePath}. * * @param writer * response writer * @param elementRef * variable name of the element in javascript * @param element * jsoup element from which to copy the attributes * @param basePath * base path of {@code src} attributes (service url's path) * @throws IOException * if {@code writer} is unable to write */ private void transferAttribute(Writer writer, String elementRef, Element element, String basePath) throws IOException { for (Attribute attribute : element.attributes()) { writer.append(elementRef).append(".setAttribute('") .append(attribute.getKey()).append("',"); if (attribute.getValue() == null) { writer.append("''"); } else { String path = attribute.getValue(); if ("src".equals(attribute.getKey())) { path = modifyPath(basePath, path); } writer.append("'").append(path).append("'"); } writer.append(");"); } }
Example #12
Source File: CaptchaElementSelector.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
/** * * @param element * @return wheter either one attribute of the current element, either its * text, either one attribute of one of its parent or the text of one of * its parents contains the "captcha" keyword */ private boolean parseAttributeToExtractCaptcha(Element element) { if (element.nodeName().equalsIgnoreCase(HTML_ELEMENT) || element.nodeName().equalsIgnoreCase(BODY_ELEMENT)) { return false; } if (StringUtils.containsIgnoreCase(element.ownText(), CAPTCHA_KEY)) { return true; } else { for (Attribute attr : element.attributes()) { if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEY)) { return true; } } } return false; }
Example #13
Source File: HTMLJsoupCleanerImpl.java From Asqatasun with GNU Affero General Public License v3.0 | 6 votes |
/** * Remove the comments of the page * * @param node */ private void removeMalformedAttributes(Node node) { // as we are removing child nodes while iterating, we cannot use a normal foreach over children, // or will get a concurrent list modification error. int i = 0; while (i < node.childNodes().size()) { Node child = node.childNode(i); for (Attribute attr : child.attributes()) { if (attr.getKey().startsWith("\"") && attr.getKey().endsWith("\"")) { child.removeAttr(attr.getKey()); } } removeMalformedAttributes(child); i++; } }
Example #14
Source File: JsoupUtils.java From jpress with GNU Lesser General Public License v3.0 | 6 votes |
@Override protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { //不允许 javascript 开头的 src 和 href if ("src".equalsIgnoreCase(attr.getKey()) || "href".equalsIgnoreCase(attr.getKey())) { String value = attr.getValue(); if (StrUtil.isNotBlank(value) && value.toLowerCase().startsWith("javascript")) { return false; } } //允许 base64 的图片内容 if ("img".equals(tagName) && "src".equals(attr.getKey()) && attr.getValue().startsWith("data:;base64")){ return true; } return super.isSafeAttribute(tagName, el, attr); }
Example #15
Source File: Whitelist.java From jsoup-learning with MIT License | 6 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); if (attributes.containsKey(tag)) { if (attributes.get(tag).contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example #16
Source File: Whitelist.java From jsoup-learning with MIT License | 6 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString() + ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example #17
Source File: MacroExtensions.java From Android-WYSIWYG-Editor with Apache License 2.0 | 6 votes |
@Override public Node buildNodeFromHTML(Element element) { String tag = element.tagName().toLowerCase(); Node node = getNodeInstance(EditorType.macro); node.content.add(tag); List<Attribute> attrs = element.attributes().asList(); if (!attrs.isEmpty()) { node.macroSettings = new HashMap<>(); for (Attribute attr : attrs) { node.macroSettings.put(attr.getKey(), attr.getValue()); } } int index = editorCore.getChildCount(); View view = editorCore.getEditorListener().onRenderMacro(tag, node.macroSettings, editorCore.getChildCount()); if(view == null) view = getEmptyMacro(node.content.get(0), node.macroSettings); insertMacro(tag, view, node.macroSettings, index); return null; }
Example #18
Source File: HtmlNavigator.java From jstarcraft-core with Apache License 2.0 | 5 votes |
@Override public Iterator<Attribute> getAttributeAxisIterator(Object contextNode, String localName, String namespacePrefix, String namespaceURI) { Element node = ((Element) contextNode); LinkedList<Attribute> attributes = new LinkedList<>(); for (Attribute attribute : node.attributes()) { if (localName.equals(attribute.getKey())) { attributes.add(attribute); } } return attributes.iterator(); }
Example #19
Source File: W3CDom.java From astor with GNU General Public License v2.0 | 5 votes |
private void copyAttributes(org.jsoup.nodes.Node source, Element el) { for (Attribute attribute : source.attributes()) { // valid xml attribute names are: ^[a-zA-Z_:][-a-zA-Z0-9_:.] String key = attribute.getKey().replaceAll("[^-a-zA-Z0-9_:.]", ""); if (key.matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*")) el.setAttribute(key, attribute.getValue()); } }
Example #20
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (lowerCase(value).startsWith(prot)) { return true; } } return false; }
Example #21
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); Set<AttributeKey> okSet = attributes.get(tag); if (okSet != null && okSet.contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } // might be an enforced attribute? Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag); if (enforcedSet != null) { Attributes expect = getEnforcedAttributes(tagName); String attrKey = attr.getKey(); if (expect.hasKeyIgnoreCase(attrKey)) { return expect.getIgnoreCase(attrKey).equals(attr.getValue()); } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example #22
Source File: SishuokWhitelist.java From es with Apache License 2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // resolve relative urls to abs, and update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString() + ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example #23
Source File: SishuokWhitelist.java From es with Apache License 2.0 | 5 votes |
boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); if (attributes.containsKey(tag)) { for (AttributeKey attributeKey : attributes.get(tag)) { if (attr.getKey().startsWith(attributeKey.toString())) { return false; } } return true; } else { // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); } }
Example #24
Source File: W3CDom.java From astor with GNU General Public License v2.0 | 5 votes |
private void copyAttributes(org.jsoup.nodes.Node source, Element el) { for (Attribute attribute : source.attributes()) { // valid xml attribute names are: ^[a-zA-Z_:][-a-zA-Z0-9_:.] String key = attribute.getKey().replaceAll("[^-a-zA-Z0-9_:.]", ""); if (key.matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*")) el.setAttribute(key, attribute.getValue()); } }
Example #25
Source File: HtmlNavigator.java From jstarcraft-core with Apache License 2.0 | 5 votes |
@Override public boolean isAttribute(Object object) { if (object instanceof Attribute) { return true; } else { return false; } }
Example #26
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (lowerCase(value).startsWith(prot)) { return true; } } return false; }
Example #27
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); Set<AttributeKey> okSet = attributes.get(tag); if (okSet != null && okSet.contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } // might be an enforced attribute? Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag); if (enforcedSet != null) { Attributes expect = getEnforcedAttributes(tagName); String attrKey = attr.getKey(); if (expect.hasKeyIgnoreCase(attrKey)) { return expect.getIgnoreCase(attrKey).equals(attr.getValue()); } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example #28
Source File: AttributesAdaptor.java From xsoup with MIT License | 5 votes |
public AttributesAdaptor(Attributes attributes, Element element) { this.attributes = attributes; this.element = element; attrList = new ArrayList<Attr>(); for (Attribute attribute : attributes) { attrList.add(new AttributeAdaptor(attribute,element)); } }
Example #29
Source File: Whitelist.java From astor with GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example #30
Source File: ElementAdaptor.java From xsoup with MIT License | 5 votes |
@Override public Attr getAttributeNode(String name) { if (element.attr(name) == null) { return null; } return NodeAdaptors.getAttr(new Attribute(name, element.attr(name)), element); }