org.w3c.dom.DocumentFragment Java Examples
The following examples show how to use
org.w3c.dom.DocumentFragment.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: W3CDomHandler.java From JDKSourceCode1.8 with MIT License | 6 votes |
public Element getElement(DOMResult r) { // JAXP spec is ambiguous about what really happens in this case, // so work defensively Node n = r.getNode(); if( n instanceof Document ) { return ((Document)n).getDocumentElement(); } if( n instanceof Element ) return (Element)n; if( n instanceof DocumentFragment ) return (Element)n.getChildNodes().item(0); // if the result object contains something strange, // it is not a user problem, but it is a JAXB provider's problem. // That's why we throw a runtime exception. throw new IllegalStateException(n.toString()); }
Example #2
Source File: XsltFOFunctions.java From docx4j-export-FO with Apache License 2.0 | 6 votes |
/** * Use RunFontSelector to determine the correct font for the list item label. * * @param context * @param foListItemLabelBody * @param pPr * @param rPr * @param text */ protected static void setFont(RunFontSelector runFontSelector, Element foListItemLabelBody, PPr pPr, RPr rPr, String text) { DocumentFragment result = (DocumentFragment)runFontSelector.fontSelector(pPr, rPr, text); log.debug(XmlUtils.w3CDomNodeToString(result)); // eg <fo:inline xmlns:fo="http://www.w3.org/1999/XSL/Format" font-family="Times New Roman">1)</fo:inline> // Now get the attribute value if (result!=null && result.getFirstChild()!=null) { Attr attr = ((Element)result.getFirstChild()).getAttributeNode("font-family"); if (attr!=null) { foListItemLabelBody.setAttribute("font-family", attr.getValue()); } } }
Example #3
Source File: MessageParser.java From translationstudio8 with GNU General Public License v2.0 | 6 votes |
/** * 将 html 格式的文本过滤掉标签. * @param html * html 格式的字符串 * @return String * 过滤掉 html 标签后的文本。如果 html 为空,返回空串"" */ private String htmlToText(String html) { if (html == null) { return ""; } DOMFragmentParser parser = new DOMFragmentParser(); CoreDocumentImpl codeDoc = new CoreDocumentImpl(); InputSource inSource = new InputSource(new ByteArrayInputStream(html.getBytes())); inSource.setEncoding(textCharset); DocumentFragment doc = codeDoc.createDocumentFragment(); try { parser.parse(inSource, doc); } catch (Exception e) { return ""; } textBuffer = new StringBuffer(); processNode(doc); return textBuffer.toString(); }
Example #4
Source File: Html5ElementStack.java From caja with Apache License 2.0 | 6 votes |
/** * Given one or two nodes, see if the two can be combined. * If two are passed in, they might be combined into one and returned, or * the first will be appended to parent, and the other returned. */ private Node appendNormalized( Node pending, Node current, DocumentFragment parent) { if (pending == null) { return current; } if (pending.getNodeType() != Node.TEXT_NODE || current.getNodeType() != Node.TEXT_NODE) { parent.appendChild(pending); return current; } Text a = (Text) pending, b = (Text) current; Text combined = doc.createTextNode(a.getTextContent() + b.getTextContent()); if (needsDebugData) { Nodes.setFilePositionFor( combined, FilePosition.span( Nodes.getFilePositionFor(a), Nodes.getFilePositionFor(b))); Nodes.setRawText(combined, Nodes.getRawText(a) + Nodes.getRawText(b)); } return combined; }
Example #5
Source File: StaxSerializer.java From cxf with Apache License 2.0 | 6 votes |
private Node appendNewChild(XMLStreamReader reader, boolean wrapped, Document contextDocument, XMLStreamWriter writer, Element element) throws XMLStreamException { StaxUtils.copy(reader, writer); DocumentFragment result = contextDocument.createDocumentFragment(); Node child = element.getFirstChild(); if (wrapped) { child = child.getFirstChild(); } if (child != null && child.getNextSibling() == null) { return child; } while (child != null) { Node nextChild = child.getNextSibling(); result.appendChild(child); child = nextChild; } return result; }
Example #6
Source File: Decrypter.java From lams with GNU General Public License v2.0 | 6 votes |
/** * Parse the specified input stream in a DOM DocumentFragment, owned by the specified Document. * * @param input the InputStream to parse * @param owningDocument the Document which will own the returned DocumentFragment * @return a DocumentFragment * @throws DecryptionException thrown if there is an error parsing the input stream */ private DocumentFragment parseInputStream(InputStream input, Document owningDocument) throws DecryptionException { // Since Xerces currently seems not to handle parsing into a DocumentFragment // without a bit hackery, use this to simulate, so we can keep the API // the way it hopefully will look in the future. Obviously this only works for // input streams containing valid XML instances, not fragments. Document newDocument = null; try { newDocument = parserPool.parse(input); } catch (XMLParserException e) { log.error("Error parsing decrypted input stream", e); throw new DecryptionException("Error parsing input stream", e); } Element element = newDocument.getDocumentElement(); owningDocument.adoptNode(element); DocumentFragment container = owningDocument.createDocumentFragment(); container.appendChild(element); return container; }
Example #7
Source File: NodesTest.java From caja with Apache License 2.0 | 6 votes |
public final void testDocumentType() throws ParseException { String[] docTypes = { "<!DOCTYPE html PUBLIC " + "\"-//W3C//DTD HTML 4.01 Transitional//EN\" " + "\"http://www.w3.org/TR/html4/loose.dtd\">", "<!DOCTYPE html PUBLIC " + "\"-//W3C//DTD XHTML 1.0 Transitional//EN\" " + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", "<!DOCTYPE html>" }; for (String docType : docTypes) { Document doc = DomParser.makeDocument(DoctypeMaker.parse(docType), null); DocumentFragment html = html(fromString("<html><b>my text</b></html>")); doc.appendChild(doc.adoptNode(html)); MoreAsserts.assertStartsWith(docType, Nodes.render(doc.getDoctype(), html, null)); } }
Example #8
Source File: HTMLLanguageParser.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
/** Try to find the document's language from page headers and metadata */ private String detectLanguage(Parse page, DocumentFragment doc) { String lang = getLanguageFromMetadata(page.getData().getParseMeta()); if (lang == null) { LanguageParser parser = new LanguageParser(doc); lang = parser.getLanguage(); } if (lang != null) { return lang; } lang = page.getData().getContentMeta().get(Response.CONTENT_LANGUAGE); return lang; }
Example #9
Source File: JSParseFilter.java From anthelion with Apache License 2.0 | 6 votes |
public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) { Parse parse = parseResult.get(content.getUrl()); String url = content.getBaseUrl(); ArrayList outlinks = new ArrayList(); walk(doc, parse, metaTags, url, outlinks); if (outlinks.size() > 0) { Outlink[] old = parse.getData().getOutlinks(); String title = parse.getData().getTitle(); List list = Arrays.asList(old); outlinks.addAll(list); ParseStatus status = parse.getData().getStatus(); String text = parse.getText(); Outlink[] newlinks = (Outlink[])outlinks.toArray(new Outlink[outlinks.size()]); ParseData parseData = new ParseData(status, title, newlinks, parse.getData().getContentMeta(), parse.getData().getParseMeta()); // replace original parse obj with new one parseResult.put(content.getUrl(), new ParseText(text), parseData); } return parseResult; }
Example #10
Source File: MessageParser.java From tmxeditor8 with GNU General Public License v2.0 | 6 votes |
/** * 将 html 格式的文本过滤掉标签. * @param html * html 格式的字符串 * @return String * 过滤掉 html 标签后的文本。如果 html 为空,返回空串"" */ private String htmlToText(String html) { if (html == null) { return ""; } DOMFragmentParser parser = new DOMFragmentParser(); CoreDocumentImpl codeDoc = new CoreDocumentImpl(); InputSource inSource = new InputSource(new ByteArrayInputStream(html.getBytes())); inSource.setEncoding(textCharset); DocumentFragment doc = codeDoc.createDocumentFragment(); try { parser.parse(inSource, doc); } catch (Exception e) { return ""; } textBuffer = new StringBuffer(); processNode(doc); return textBuffer.toString(); }
Example #11
Source File: MD5SignatureParseFilter.java From storm-crawler with Apache License 2.0 | 6 votes |
@Override public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) { ParseData parseData = parse.get(URL); Metadata metadata = parseData.getMetadata(); if (copyKeyName != null) { String signature = metadata.getFirstValue(key_name); if (signature != null) { metadata.setValue(copyKeyName, signature); } } byte[] data = null; if (useText) { String text = parseData.getText(); if (StringUtils.isNotBlank(text)) { data = text.getBytes(StandardCharsets.UTF_8); } } else { data = content; } if (data == null) { data = URL.getBytes(StandardCharsets.UTF_8); } String hex = DigestUtils.md5Hex(data); metadata.setValue(key_name, hex); }
Example #12
Source File: IMFTrackFileCPLBuilder.java From photon with Apache License 2.0 | 5 votes |
private void buildEssenceDescriptorList(List<String> uuidList, IMFErrorLogger imfErrorLogger) throws IOException{ try { List<EssenceDescriptorBaseType> essenceDescriptorList = this.cplRoot.getEssenceDescriptorList().getEssenceDescriptor(); List<InterchangeObject.InterchangeObjectBO> essenceDescriptors = this.imfTrackFileReader.getEssenceDescriptors(imfErrorLogger); for(InterchangeObject.InterchangeObjectBO essenceDescriptor : essenceDescriptors) { KLVPacket.Header essenceDescriptorHeader = essenceDescriptor.getHeader(); List<KLVPacket.Header> subDescriptorHeaders = this.imfTrackFileReader.getSubDescriptorKLVHeader(essenceDescriptor, imfErrorLogger); /*Create a dom*/ DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); Document document = docBuilder.newDocument(); EssenceDescriptorBaseType essenceDescriptorBaseType = new EssenceDescriptorBaseType(); String uuid = IMFUUIDGenerator.getInstance().getUrnUUID(); essenceDescriptorBaseType.setId(uuid); uuidList.add(uuid); DocumentFragment documentFragment = this.getEssenceDescriptorAsDocumentFragment(document, essenceDescriptorHeader, subDescriptorHeaders, imfErrorLogger); Node node = documentFragment.getFirstChild(); essenceDescriptorBaseType.getAny().add(node); essenceDescriptorList.add(essenceDescriptorBaseType); } } catch(ParserConfigurationException e){ imfErrorLogger.addError(IMFErrorLogger.IMFErrors.ErrorCodes.IMF_CPL_ERROR, IMFErrorLogger.IMFErrors .ErrorLevels.FATAL, e.getMessage()); throw new IMFException(e); } }
Example #13
Source File: UnImplNode.java From jdk1.8-source-analysis with Apache License 2.0 | 5 votes |
/** * Unimplemented. See org.w3c.dom.Document * * @return null */ public DocumentFragment createDocumentFragment() { error(XMLErrorResources.ER_FUNCTION_NOT_SUPPORTED); return null; }
Example #14
Source File: DomParserTest.java From caja with Apache License 2.0 | 5 votes |
public final void testIssue1211XmlnsOnScript() throws Exception { DocumentFragment f = htmlFragment(fromString( "" + "<script type=\"text/os-data\"\n" + " xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n" + " <os:ViewerRequest key=\"viewer\"/>\n" + "</script>")); assertEquals( "" + "<script type=\"text/os-data\"" + " xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n" + " <os:ViewerRequest key=\"viewer\"/>\n" + "</script>", Nodes.render(f)); }
Example #15
Source File: DocumentFragmentTest.java From anno4j with Apache License 2.0 | 5 votes |
public void testAddNamespaceElement() throws Exception { String xml = "<a:Box xmlns:a=\"http://example.org/a#\" required=\"true\"><a:widget size=\"10\"> </a:widget><a:grommit id=\"23\"> text </a:grommit></a:Box>"; Document doc = parse(xml); ObjectFactory of = con.getObjectFactory(); Entity entity = con.addDesignation(of.createObject(), Entity.class); DocumentFragment frag = doc.createDocumentFragment(); frag.appendChild(doc.getDocumentElement()); entity.setXML(frag); RepositoryResult<Statement> resuts = con.getStatements(entity.getResource(), pred, null); String label = resuts.next().getObject().stringValue(); resuts.close(); assertEquals(xml, label); }
Example #16
Source File: JmsSubscription.java From cxf with Apache License 2.0 | 5 votes |
public void onMessage(Message jmsMessage) { try { TextMessage text = (TextMessage) jmsMessage; XMLStreamReader reader = StaxUtils.createXMLStreamReader(new StringReader(text.getText())); Notify notify = (Notify) jaxbContext.createUnmarshaller() .unmarshal(reader); reader.close(); for (Iterator<NotificationMessageHolderType> ith = notify.getNotificationMessage().iterator(); ith.hasNext();) { NotificationMessageHolderType h = ith.next(); Object content = h.getMessage().getAny(); if (!(content instanceof Element)) { DocumentFragment doc = DOMUtils.getEmptyDocument().createDocumentFragment(); jaxbContext.createMarshaller().marshal(content, doc); content = DOMUtils.getFirstElement(doc); } if (!doFilter((Element) content)) { ith.remove(); } else { h.setTopic(topic); h.setSubscriptionReference(getEpr()); } } if (!notify.getNotificationMessage().isEmpty()) { doNotify(notify); } } catch (Exception e) { LOGGER.log(Level.WARNING, "Error notifying consumer", e); } }
Example #17
Source File: BaseMarkupSerializer.java From jdk1.8-source-analysis with Apache License 2.0 | 5 votes |
/** * Serializes the DOM document fragmnt using the previously specified * writer and output format. Throws an exception only if * an I/O exception occured while serializing. * * @param elem The element to serialize * @throws IOException An I/O exception occured while * serializing */ public void serialize( DocumentFragment frag ) throws IOException { reset(); prepare(); serializeNode( frag ); _printer.flush(); if ( _printer.getException() != null ) throw _printer.getException(); }
Example #18
Source File: SparqlEvaluator.java From anno4j with Apache License 2.0 | 5 votes |
public DocumentFragment asDocumentFragment() throws OpenRDFException, TransformerException, IOException, ParserConfigurationException { Document doc = asDocument(); DocumentFragment frag = doc.createDocumentFragment(); frag.appendChild(doc.getDocumentElement()); return frag; }
Example #19
Source File: NodesRenderTest.java From caja with Apache License 2.0 | 5 votes |
private DocumentFragment html(String source) { try { return htmlFragment(fromString(source)); } catch (ParseException e) { throw new RuntimeException(e); } }
Example #20
Source File: DomParserTest.java From caja with Apache License 2.0 | 5 votes |
public final void testIssue1211XmlnsOnDiv() throws Exception { DocumentFragment f = htmlFragment(fromString( "" + "<div xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n" + " <os:ViewerRequest key=\"viewer\"/>\n" + "</div>")); assertEquals( "" + "<div xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n" + " <os:ViewerRequest key=\"viewer\">\n" + "</os:ViewerRequest></div>", Nodes.render(f)); }
Example #21
Source File: DOMPrinter.java From openjdk-jdk8u-backup with GNU General Public License v2.0 | 5 votes |
public void print(Node node) throws XMLStreamException { switch (node.getNodeType()) { case Node.DOCUMENT_NODE: visitDocument((Document) node); break; case Node.DOCUMENT_FRAGMENT_NODE: visitDocumentFragment((DocumentFragment) node); break; case Node.ELEMENT_NODE: visitElement((Element) node); break; case Node.TEXT_NODE: visitText((Text) node); break; case Node.CDATA_SECTION_NODE: visitCDATASection((CDATASection) node); break; case Node.PROCESSING_INSTRUCTION_NODE: visitProcessingInstruction((ProcessingInstruction) node); break; case Node.ENTITY_REFERENCE_NODE: visitReference((EntityReference) node); break; case Node.COMMENT_NODE: visitComment((Comment) node); break; case Node.DOCUMENT_TYPE_NODE: break; case Node.ATTRIBUTE_NODE: case Node.ENTITY_NODE: default: throw new XMLStreamException("Unexpected DOM Node Type " + node.getNodeType() ); } }
Example #22
Source File: HTMLLanguageParser.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
/** * Scan the HTML document looking at possible indications of content * language<br> * <li>1. html lang attribute * (http://www.w3.org/TR/REC-html40/struct/dirlang.html#h-8.1) <li>2. meta * dc.language * (http://dublincore.org/documents/2000/07/16/usageguide/qualified * -html.shtml#language) <li>3. meta http-equiv (content-language) * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) <br> */ public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) { String lang = null; Parse parse = parseResult.get(content.getUrl()); if (detect >= 0 && identify < 0) { lang = detectLanguage(parse, doc); } else if (detect < 0 && identify >= 0) { lang = identifyLanguage(parse); } else if (detect < identify) { lang = detectLanguage(parse, doc); if (lang == null) { lang = identifyLanguage(parse); } } else if (identify < detect) { lang = identifyLanguage(parse); if (lang == null) { lang = detectLanguage(parse, doc); } } else { LOG.warn("No configuration for language extraction policy is provided"); return parseResult; } if (lang != null) { parse.getData().getParseMeta().set(Metadata.LANGUAGE, lang); return parseResult; } return parseResult; }
Example #23
Source File: WdcParser.java From anthelion with Apache License 2.0 | 5 votes |
private DocumentFragment parseTagSoup(InputSource input) throws Exception { HTMLDocumentImpl doc = new HTMLDocumentImpl(); DocumentFragment frag = doc.createDocumentFragment(); DOMBuilder builder = new DOMBuilder(doc, frag); org.ccil.cowan.tagsoup.Parser reader = new org.ccil.cowan.tagsoup.Parser(); reader.setContentHandler(builder); reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true); reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false); reader.setProperty("http://xml.org/sax/properties/lexical-handler", builder); reader.parse(input); return frag; }
Example #24
Source File: DomainParseFilter.java From storm-crawler with Apache License 2.0 | 5 votes |
@Override public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) { Metadata metadata = parse.get(URL).getMetadata(); String value = partitioner.getPartition(URL, metadata); metadata.setValue(mdKey, value); }
Example #25
Source File: UnImplNode.java From Bytecoder with Apache License 2.0 | 5 votes |
/** * Unimplemented. See org.w3c.dom.Document * * @return null */ public DocumentFragment createDocumentFragment() { error(XMLErrorResources.ER_FUNCTION_NOT_SUPPORTED); return null; }
Example #26
Source File: DocumentFragmentBuilder.java From storm-crawler with Apache License 2.0 | 5 votes |
public static DocumentFragment fromJsoup( org.jsoup.nodes.Document jsoupDocument) { HTMLDocumentImpl htmlDoc = new HTMLDocumentImpl(); htmlDoc.setErrorChecking(false); DocumentFragment fragment = htmlDoc.createDocumentFragment(); org.jsoup.nodes.Element rootEl = jsoupDocument.child(0); // skip the // #root node NodeTraversor.traverse(new W3CBuilder(htmlDoc, fragment), rootEl); return fragment; }
Example #27
Source File: XMLStreamDataWriter.java From cxf with Apache License 2.0 | 5 votes |
private void writeNode(Node nd, XMLStreamWriter writer) throws XMLStreamException { if (writer instanceof W3CDOMStreamWriter) { W3CDOMStreamWriter dw = (W3CDOMStreamWriter)writer; if (dw.getCurrentNode() != null) { if (nd instanceof DocumentFragment && nd.getOwnerDocument() == dw.getCurrentNode().getOwnerDocument()) { Node ch = nd.getFirstChild(); while (ch != null) { nd.removeChild(ch); dw.getCurrentNode().appendChild(org.apache.cxf.helpers.DOMUtils.getDomElement(ch)); ch = nd.getFirstChild(); } } else if (nd.getOwnerDocument() == dw.getCurrentNode().getOwnerDocument()) { dw.getCurrentNode().appendChild(nd); return; } else if (nd instanceof DocumentFragment) { nd = dw.getDocument().importNode(nd, true); dw.getCurrentNode().appendChild(nd); return; } } else if (dw.getCurrentFragment() != null) { if (nd.getOwnerDocument() == dw.getCurrentFragment().getOwnerDocument()) { dw.getCurrentFragment().appendChild(nd); return; } else if (nd instanceof DocumentFragment) { nd = dw.getDocument().importNode(nd, true); dw.getCurrentFragment().appendChild(nd); return; } } } if (nd instanceof Document) { StaxUtils.writeDocument((Document)nd, writer, false, true); } else { StaxUtils.writeNode(nd, writer, true); } }
Example #28
Source File: BaseMarkupSerializer.java From JDKSourceCode1.8 with MIT License | 5 votes |
/** * Serializes the DOM document fragmnt using the previously specified * writer and output format. Throws an exception only if * an I/O exception occured while serializing. * * @param elem The element to serialize * @throws IOException An I/O exception occured while * serializing */ public void serialize( DocumentFragment frag ) throws IOException { reset(); prepare(); serializeNode( frag ); _printer.flush(); if ( _printer.getException() != null ) throw _printer.getException(); }
Example #29
Source File: DOMPrinter.java From openjdk-8 with GNU General Public License v2.0 | 5 votes |
public void print(Node node) throws XMLStreamException { switch (node.getNodeType()) { case Node.DOCUMENT_NODE: visitDocument((Document) node); break; case Node.DOCUMENT_FRAGMENT_NODE: visitDocumentFragment((DocumentFragment) node); break; case Node.ELEMENT_NODE: visitElement((Element) node); break; case Node.TEXT_NODE: visitText((Text) node); break; case Node.CDATA_SECTION_NODE: visitCDATASection((CDATASection) node); break; case Node.PROCESSING_INSTRUCTION_NODE: visitProcessingInstruction((ProcessingInstruction) node); break; case Node.ENTITY_REFERENCE_NODE: visitReference((EntityReference) node); break; case Node.COMMENT_NODE: visitComment((Comment) node); break; case Node.DOCUMENT_TYPE_NODE: break; case Node.ATTRIBUTE_NODE: case Node.ENTITY_NODE: default: throw new XMLStreamException("Unexpected DOM Node Type " + node.getNodeType() ); } }
Example #30
Source File: DebugParseFilter.java From storm-crawler with Apache License 2.0 | 5 votes |
@Override public void filter(String URL, byte[] content, DocumentFragment doc, ParseResult parse) { try { XMLSerializer serializer = new XMLSerializer(os, null); serializer.serialize(doc); os.flush(); } catch (IOException e) { e.printStackTrace(); } }