org.w3c.dom.DocumentFragment Java Exaples

Source File: W3CDomHandler.java From JDKSourceCode1.8 with MIT License

6 votes

public Element getElement(DOMResult r) {
    // JAXP spec is ambiguous about what really happens in this case,
    // so work defensively
    Node n = r.getNode();
    if( n instanceof Document ) {
        return ((Document)n).getDocumentElement();
    }
    if( n instanceof Element )
        return (Element)n;
    if( n instanceof DocumentFragment )
        return (Element)n.getChildNodes().item(0);

    // if the result object contains something strange,
    // it is not a user problem, but it is a JAXB provider's problem.
    // That's why we throw a runtime exception.
    throw new IllegalStateException(n.toString());
}

Source File: XsltFOFunctions.java From docx4j-export-FO with Apache License 2.0

6 votes

/**
 * Use RunFontSelector to determine the correct font for the list item label.
 * 
 * @param context
 * @param foListItemLabelBody
 * @param pPr
 * @param rPr
 * @param text
 */
protected static void setFont(RunFontSelector runFontSelector, Element foListItemLabelBody, PPr pPr, RPr rPr, String text) {
	
	DocumentFragment result = (DocumentFragment)runFontSelector.fontSelector(pPr, rPr, text);
	log.debug(XmlUtils.w3CDomNodeToString(result));
	// eg <fo:inline xmlns:fo="http://www.w3.org/1999/XSL/Format" font-family="Times New Roman">1)</fo:inline>
	
	// Now get the attribute value
	if (result!=null && result.getFirstChild()!=null) {
		Attr attr = ((Element)result.getFirstChild()).getAttributeNode("font-family");
		if (attr!=null) {
			foListItemLabelBody.setAttribute("font-family", attr.getValue());
		}
	}
			
}

Source File: MessageParser.java From translationstudio8 with GNU General Public License v2.0

6 votes

/**
 * 将 html 格式的文本过滤掉标签.
 * @param html
 *            html 格式的字符串
 * @return String
 * 			  过滤掉 html 标签后的文本。如果 html 为空，返回空串""
 */
private String htmlToText(String html) {
	if (html == null) {
		return "";
	}
	DOMFragmentParser parser = new DOMFragmentParser();
	CoreDocumentImpl codeDoc = new CoreDocumentImpl();
	InputSource inSource = new InputSource(new ByteArrayInputStream(html.getBytes()));
	inSource.setEncoding(textCharset);
	DocumentFragment doc = codeDoc.createDocumentFragment();

	try {
		parser.parse(inSource, doc);
	} catch (Exception e) {
		return "";
	}

	textBuffer = new StringBuffer();
	processNode(doc);
	return textBuffer.toString();
}

Source File: Html5ElementStack.java From caja with Apache License 2.0

6 votes

/**
 * Given one or two nodes, see if the two can be combined.
 * If two are passed in, they might be combined into one and returned, or
 * the first will be appended to parent, and the other returned.
 */
private Node appendNormalized(
    Node pending, Node current, DocumentFragment parent) {
  if (pending == null) { return current; }
  if (pending.getNodeType() != Node.TEXT_NODE
      || current.getNodeType() != Node.TEXT_NODE) {
    parent.appendChild(pending);
    return current;
  }
  Text a = (Text) pending, b = (Text) current;
  Text combined = doc.createTextNode(a.getTextContent() + b.getTextContent());
  if (needsDebugData) {
    Nodes.setFilePositionFor(
        combined,
        FilePosition.span(
            Nodes.getFilePositionFor(a),
            Nodes.getFilePositionFor(b)));
    Nodes.setRawText(combined, Nodes.getRawText(a) + Nodes.getRawText(b));
  }
  return combined;
}

Source File: StaxSerializer.java From cxf with Apache License 2.0

6 votes

private Node appendNewChild(XMLStreamReader reader, boolean wrapped, Document contextDocument,
                            XMLStreamWriter writer, Element element) throws XMLStreamException {
    StaxUtils.copy(reader, writer);

    DocumentFragment result = contextDocument.createDocumentFragment();
    Node child = element.getFirstChild();
    if (wrapped) {
        child = child.getFirstChild();
    }
    if (child != null && child.getNextSibling() == null) {
        return child;
    }
    while (child != null) {
        Node nextChild = child.getNextSibling();
        result.appendChild(child);
        child = nextChild;
    }

    return result;
}

Source File: Decrypter.java From lams with GNU General Public License v2.0

6 votes

/**
 * Parse the specified input stream in a DOM DocumentFragment, owned by the specified Document.
 * 
 * @param input the InputStream to parse
 * @param owningDocument the Document which will own the returned DocumentFragment
 * @return a DocumentFragment
 * @throws DecryptionException thrown if there is an error parsing the input stream
 */
private DocumentFragment parseInputStream(InputStream input, Document owningDocument) throws DecryptionException {
    // Since Xerces currently seems not to handle parsing into a DocumentFragment
    // without a bit hackery, use this to simulate, so we can keep the API
    // the way it hopefully will look in the future. Obviously this only works for
    // input streams containing valid XML instances, not fragments.

    Document newDocument = null;
    try {
        newDocument = parserPool.parse(input);
    } catch (XMLParserException e) {
        log.error("Error parsing decrypted input stream", e);
        throw new DecryptionException("Error parsing input stream", e);
    }

    Element element = newDocument.getDocumentElement();
    owningDocument.adoptNode(element);

    DocumentFragment container = owningDocument.createDocumentFragment();
    container.appendChild(element);

    return container;
}

Source File: NodesTest.java From caja with Apache License 2.0

6 votes

public final void testDocumentType() throws ParseException {
  String[] docTypes = {
      "<!DOCTYPE html PUBLIC "
      + "\"-//W3C//DTD HTML 4.01 Transitional//EN\" "
      + "\"http://www.w3.org/TR/html4/loose.dtd\">",
      "<!DOCTYPE html PUBLIC "
      + "\"-//W3C//DTD XHTML 1.0 Transitional//EN\" "
      + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">",
      "<!DOCTYPE html>"
  };
  for (String docType : docTypes) {
    Document doc = DomParser.makeDocument(DoctypeMaker.parse(docType), null);
    DocumentFragment html = html(fromString("<html><b>my text</b></html>"));
    doc.appendChild(doc.adoptNode(html));
    MoreAsserts.assertStartsWith(docType,
        Nodes.render(doc.getDoctype(), html, null));
  }
}

Source File: HTMLLanguageParser.java From nutch-htmlunit with Apache License 2.0

6 votes

/** Try to find the document's language from page headers and metadata */
private String detectLanguage(Parse page, DocumentFragment doc) {
    String lang = getLanguageFromMetadata(page.getData().getParseMeta());
    if (lang == null) {
        LanguageParser parser = new LanguageParser(doc);
        lang = parser.getLanguage();
    }

    if (lang != null) {
        return lang;
    }

    lang = page.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);

    return lang;
}

Source File: JSParseFilter.java From anthelion with Apache License 2.0

6 votes

public ParseResult filter(Content content, ParseResult parseResult,
  HTMLMetaTags metaTags, DocumentFragment doc) {

  Parse parse = parseResult.get(content.getUrl());

  String url = content.getBaseUrl();
  ArrayList outlinks = new ArrayList();
  walk(doc, parse, metaTags, url, outlinks);
  if (outlinks.size() > 0) {
    Outlink[] old = parse.getData().getOutlinks();
    String title = parse.getData().getTitle();
    List list = Arrays.asList(old);
    outlinks.addAll(list);
    ParseStatus status = parse.getData().getStatus();
    String text = parse.getText();
    Outlink[] newlinks = (Outlink[])outlinks.toArray(new Outlink[outlinks.size()]);
    ParseData parseData = new ParseData(status, title, newlinks,
                                        parse.getData().getContentMeta(),
                                        parse.getData().getParseMeta());

    // replace original parse obj with new one
    parseResult.put(content.getUrl(), new ParseText(text), parseData);
  }
  return parseResult;
}

Source File: MessageParser.java From tmxeditor8 with GNU General Public License v2.0

6 votes

/**
 * 将 html 格式的文本过滤掉标签.
 * @param html
 *            html 格式的字符串
 * @return String
 * 			  过滤掉 html 标签后的文本。如果 html 为空，返回空串""
 */
private String htmlToText(String html) {
	if (html == null) {
		return "";
	}
	DOMFragmentParser parser = new DOMFragmentParser();
	CoreDocumentImpl codeDoc = new CoreDocumentImpl();
	InputSource inSource = new InputSource(new ByteArrayInputStream(html.getBytes()));
	inSource.setEncoding(textCharset);
	DocumentFragment doc = codeDoc.createDocumentFragment();

	try {
		parser.parse(inSource, doc);
	} catch (Exception e) {
		return "";
	}

	textBuffer = new StringBuffer();
	processNode(doc);
	return textBuffer.toString();
}

Source File: MD5SignatureParseFilter.java From storm-crawler with Apache License 2.0

6 votes

@Override
public void filter(String URL, byte[] content, DocumentFragment doc,
        ParseResult parse) {
    ParseData parseData = parse.get(URL);
    Metadata metadata = parseData.getMetadata();
    if (copyKeyName != null) {
        String signature = metadata.getFirstValue(key_name);
        if (signature != null) {
            metadata.setValue(copyKeyName, signature);
        }
    }
    byte[] data = null;
    if (useText) {
        String text = parseData.getText();
        if (StringUtils.isNotBlank(text)) {
            data = text.getBytes(StandardCharsets.UTF_8);
        }
    } else {
        data = content;
    }
    if (data == null) {
        data = URL.getBytes(StandardCharsets.UTF_8);
    }
    String hex = DigestUtils.md5Hex(data);
    metadata.setValue(key_name, hex);
}

Source File: IMFTrackFileCPLBuilder.java From photon with Apache License 2.0

5 votes

private void buildEssenceDescriptorList(List<String> uuidList, IMFErrorLogger imfErrorLogger) throws IOException{

        try {
            List<EssenceDescriptorBaseType> essenceDescriptorList = this.cplRoot.getEssenceDescriptorList().getEssenceDescriptor();
            List<InterchangeObject.InterchangeObjectBO> essenceDescriptors = this.imfTrackFileReader.getEssenceDescriptors(imfErrorLogger);
            for(InterchangeObject.InterchangeObjectBO essenceDescriptor : essenceDescriptors) {
                KLVPacket.Header essenceDescriptorHeader = essenceDescriptor.getHeader();
                List<KLVPacket.Header> subDescriptorHeaders = this.imfTrackFileReader.getSubDescriptorKLVHeader(essenceDescriptor, imfErrorLogger);
                /*Create a dom*/
                DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
                DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
                Document document = docBuilder.newDocument();

                EssenceDescriptorBaseType essenceDescriptorBaseType = new EssenceDescriptorBaseType();
                String uuid = IMFUUIDGenerator.getInstance().getUrnUUID();
                essenceDescriptorBaseType.setId(uuid);
                uuidList.add(uuid);

                DocumentFragment documentFragment = this.getEssenceDescriptorAsDocumentFragment(document, essenceDescriptorHeader, subDescriptorHeaders, imfErrorLogger);
                Node node = documentFragment.getFirstChild();

                essenceDescriptorBaseType.getAny().add(node);
                essenceDescriptorList.add(essenceDescriptorBaseType);
            }
        }
        catch(ParserConfigurationException e){
            imfErrorLogger.addError(IMFErrorLogger.IMFErrors.ErrorCodes.IMF_CPL_ERROR, IMFErrorLogger.IMFErrors
                    .ErrorLevels.FATAL, e.getMessage());
            throw new IMFException(e);
        }
    }

Source File: UnImplNode.java From jdk1.8-source-analysis with Apache License 2.0

5 votes

/**
 * Unimplemented. See org.w3c.dom.Document
 *
 * @return null
 */
public DocumentFragment createDocumentFragment()
{

  error(XMLErrorResources.ER_FUNCTION_NOT_SUPPORTED);

  return null;
}

Source File: DomParserTest.java From caja with Apache License 2.0

5 votes

public final void testIssue1211XmlnsOnScript() throws Exception {
  DocumentFragment f = htmlFragment(fromString(
      ""
      + "<script type=\"text/os-data\"\n"
      + "    xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
      + "  <os:ViewerRequest key=\"viewer\"/>\n"
      + "</script>"));
  assertEquals(
      ""
      + "<script type=\"text/os-data\""
      + " xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
      + "  <os:ViewerRequest key=\"viewer\"/>\n"
      + "</script>",
      Nodes.render(f));
}

Source File: DocumentFragmentTest.java From anno4j with Apache License 2.0

5 votes

public void testAddNamespaceElement() throws Exception {
	String xml = "<a:Box xmlns:a=\"http://example.org/a#\" required=\"true\"><a:widget size=\"10\"> </a:widget><a:grommit id=\"23\"> text </a:grommit></a:Box>";
	Document doc = parse(xml);
	ObjectFactory of = con.getObjectFactory();
	Entity entity = con.addDesignation(of.createObject(), Entity.class);
	DocumentFragment frag = doc.createDocumentFragment();
	frag.appendChild(doc.getDocumentElement());
	entity.setXML(frag);
	RepositoryResult<Statement> resuts = con.getStatements(entity.getResource(), pred, null);
	String label = resuts.next().getObject().stringValue();
	resuts.close();
	assertEquals(xml, label);
}

Source File: JmsSubscription.java From cxf with Apache License 2.0

5 votes

public void onMessage(Message jmsMessage) {
    try {
        TextMessage text = (TextMessage) jmsMessage;
        XMLStreamReader reader = StaxUtils.createXMLStreamReader(new StringReader(text.getText()));
        Notify notify = (Notify) jaxbContext.createUnmarshaller()
                .unmarshal(reader);
        reader.close();
        for (Iterator<NotificationMessageHolderType> ith = notify.getNotificationMessage().iterator();
            ith.hasNext();) {
            NotificationMessageHolderType h = ith.next();
            Object content = h.getMessage().getAny();
            if (!(content instanceof Element)) {
                DocumentFragment doc = DOMUtils.getEmptyDocument().createDocumentFragment();
                jaxbContext.createMarshaller().marshal(content, doc);
                content = DOMUtils.getFirstElement(doc);
            }
            if (!doFilter((Element) content)) {
                ith.remove();
            } else {
                h.setTopic(topic);
                h.setSubscriptionReference(getEpr());
            }
        }
        if (!notify.getNotificationMessage().isEmpty()) {
            doNotify(notify);
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Error notifying consumer", e);
    }
}

Source File: BaseMarkupSerializer.java From jdk1.8-source-analysis with Apache License 2.0

5 votes

/**
 * Serializes the DOM document fragmnt using the previously specified
 * writer and output format. Throws an exception only if
 * an I/O exception occured while serializing.
 *
 * @param elem The element to serialize
 * @throws IOException An I/O exception occured while
 *   serializing
 */
public void serialize( DocumentFragment frag )
    throws IOException
{
    reset();
    prepare();
    serializeNode( frag );
    _printer.flush();
    if ( _printer.getException() != null )
        throw _printer.getException();
}

Source File: SparqlEvaluator.java From anno4j with Apache License 2.0

5 votes

public DocumentFragment asDocumentFragment() throws OpenRDFException,
		TransformerException, IOException, ParserConfigurationException {
	Document doc = asDocument();
	DocumentFragment frag = doc.createDocumentFragment();
	frag.appendChild(doc.getDocumentElement());
	return frag;
}

Source File: NodesRenderTest.java From caja with Apache License 2.0

5 votes

private DocumentFragment html(String source) {
  try {
    return htmlFragment(fromString(source));
  } catch (ParseException e) {
    throw new RuntimeException(e);
  }
}

Source File: DomParserTest.java From caja with Apache License 2.0

5 votes

public final void testIssue1211XmlnsOnDiv() throws Exception {
  DocumentFragment f = htmlFragment(fromString(
      ""
      + "<div xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
      + "  <os:ViewerRequest key=\"viewer\"/>\n"
      + "</div>"));
  assertEquals(
      ""
      + "<div xmlns:os=\"http://ns.opensocial.org/2008/markup\">\n"
      + "  <os:ViewerRequest key=\"viewer\">\n"
      + "</os:ViewerRequest></div>",
      Nodes.render(f));
}

Source File: DOMPrinter.java From openjdk-jdk8u-backup with GNU General Public License v2.0

5 votes

public void print(Node node) throws XMLStreamException {
    switch (node.getNodeType()) {
    case Node.DOCUMENT_NODE:
        visitDocument((Document) node);
        break;
    case Node.DOCUMENT_FRAGMENT_NODE:
        visitDocumentFragment((DocumentFragment) node);
        break;
    case Node.ELEMENT_NODE:
        visitElement((Element) node);
        break;
    case Node.TEXT_NODE:
        visitText((Text) node);
        break;
    case Node.CDATA_SECTION_NODE:
        visitCDATASection((CDATASection) node);
        break;
    case Node.PROCESSING_INSTRUCTION_NODE:
        visitProcessingInstruction((ProcessingInstruction) node);
        break;
    case Node.ENTITY_REFERENCE_NODE:
        visitReference((EntityReference) node);
        break;
    case Node.COMMENT_NODE:
        visitComment((Comment) node);
        break;
    case Node.DOCUMENT_TYPE_NODE:
        break;
    case Node.ATTRIBUTE_NODE:
    case Node.ENTITY_NODE:
    default:
        throw new XMLStreamException("Unexpected DOM Node Type "
            + node.getNodeType()
        );
    }
}

Source File: HTMLLanguageParser.java From nutch-htmlunit with Apache License 2.0

5 votes

/**
 * Scan the HTML document looking at possible indications of content
 * language<br>
 * <li>1. html lang attribute
 * (http://www.w3.org/TR/REC-html40/struct/dirlang.html#h-8.1) <li>2. meta
 * dc.language
 * (http://dublincore.org/documents/2000/07/16/usageguide/qualified
 * -html.shtml#language) <li>3. meta http-equiv (content-language)
 * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) <br>
 */
public ParseResult filter(Content content, ParseResult parseResult,
        HTMLMetaTags metaTags, DocumentFragment doc) {
    String lang = null;

    Parse parse = parseResult.get(content.getUrl());

    if (detect >= 0 && identify < 0) {
        lang = detectLanguage(parse, doc);
    } else if (detect < 0 && identify >= 0) {
        lang = identifyLanguage(parse);
    } else if (detect < identify) {
        lang = detectLanguage(parse, doc);
        if (lang == null) {
            lang = identifyLanguage(parse);
        }
    } else if (identify < detect) {
        lang = identifyLanguage(parse);
        if (lang == null) {
            lang = detectLanguage(parse, doc);
        }
    } else {
        LOG.warn("No configuration for language extraction policy is provided");
        return parseResult;
    }

    if (lang != null) {
        parse.getData().getParseMeta().set(Metadata.LANGUAGE, lang);
        return parseResult;
    }

    return parseResult;
}

Source File: WdcParser.java From anthelion with Apache License 2.0

5 votes

private DocumentFragment parseTagSoup(InputSource input) throws Exception {
	HTMLDocumentImpl doc = new HTMLDocumentImpl();
	DocumentFragment frag = doc.createDocumentFragment();
	DOMBuilder builder = new DOMBuilder(doc, frag);
	org.ccil.cowan.tagsoup.Parser reader = new org.ccil.cowan.tagsoup.Parser();
	reader.setContentHandler(builder);
	reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
	reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);
	reader.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
	reader.parse(input);
	return frag;
}

Source File: DomainParseFilter.java From storm-crawler with Apache License 2.0

5 votes

@Override
public void filter(String URL, byte[] content, DocumentFragment doc,
        ParseResult parse) {
    Metadata metadata = parse.get(URL).getMetadata();
    String value = partitioner.getPartition(URL, metadata);
    metadata.setValue(mdKey, value);
}

Source File: UnImplNode.java From Bytecoder with Apache License 2.0

5 votes

/**
 * Unimplemented. See org.w3c.dom.Document
 *
 * @return null
 */
public DocumentFragment createDocumentFragment()
{

  error(XMLErrorResources.ER_FUNCTION_NOT_SUPPORTED);

  return null;
}

Source File: DocumentFragmentBuilder.java From storm-crawler with Apache License 2.0

5 votes

public static DocumentFragment fromJsoup(
        org.jsoup.nodes.Document jsoupDocument) {
    HTMLDocumentImpl htmlDoc = new HTMLDocumentImpl();
    htmlDoc.setErrorChecking(false);
    DocumentFragment fragment = htmlDoc.createDocumentFragment();
    org.jsoup.nodes.Element rootEl = jsoupDocument.child(0); // skip the
                                                             // #root node
    NodeTraversor.traverse(new W3CBuilder(htmlDoc, fragment), rootEl);
    return fragment;
}

Source File: XMLStreamDataWriter.java From cxf with Apache License 2.0

5 votes

private void writeNode(Node nd, XMLStreamWriter writer) throws XMLStreamException {
    if (writer instanceof W3CDOMStreamWriter) {
        W3CDOMStreamWriter dw = (W3CDOMStreamWriter)writer;

        if (dw.getCurrentNode() != null) {
            if (nd instanceof DocumentFragment
                && nd.getOwnerDocument() == dw.getCurrentNode().getOwnerDocument()) {
                Node ch = nd.getFirstChild();
                while (ch != null) {
                    nd.removeChild(ch);
                    dw.getCurrentNode().appendChild(org.apache.cxf.helpers.DOMUtils.getDomElement(ch));
                    ch = nd.getFirstChild();
                }
            } else if (nd.getOwnerDocument() == dw.getCurrentNode().getOwnerDocument()) {
                dw.getCurrentNode().appendChild(nd);
                return;
            } else if (nd instanceof DocumentFragment) {
                nd = dw.getDocument().importNode(nd, true);
                dw.getCurrentNode().appendChild(nd);
                return;
            }
        } else if (dw.getCurrentFragment() != null) {
            if (nd.getOwnerDocument() == dw.getCurrentFragment().getOwnerDocument()) {
                dw.getCurrentFragment().appendChild(nd);
                return;
            } else if (nd instanceof DocumentFragment) {
                nd = dw.getDocument().importNode(nd, true);
                dw.getCurrentFragment().appendChild(nd);
                return;
            }
        }
    }
    if (nd instanceof Document) {
        StaxUtils.writeDocument((Document)nd,
                                writer, false, true);
    } else {
        StaxUtils.writeNode(nd, writer, true);
    }

}

Source File: BaseMarkupSerializer.java From JDKSourceCode1.8 with MIT License

5 votes

/**
 * Serializes the DOM document fragmnt using the previously specified
 * writer and output format. Throws an exception only if
 * an I/O exception occured while serializing.
 *
 * @param elem The element to serialize
 * @throws IOException An I/O exception occured while
 *   serializing
 */
public void serialize( DocumentFragment frag )
    throws IOException
{
    reset();
    prepare();
    serializeNode( frag );
    _printer.flush();
    if ( _printer.getException() != null )
        throw _printer.getException();
}

Source File: DOMPrinter.java From openjdk-8 with GNU General Public License v2.0

5 votes

public void print(Node node) throws XMLStreamException {
    switch (node.getNodeType()) {
    case Node.DOCUMENT_NODE:
        visitDocument((Document) node);
        break;
    case Node.DOCUMENT_FRAGMENT_NODE:
        visitDocumentFragment((DocumentFragment) node);
        break;
    case Node.ELEMENT_NODE:
        visitElement((Element) node);
        break;
    case Node.TEXT_NODE:
        visitText((Text) node);
        break;
    case Node.CDATA_SECTION_NODE:
        visitCDATASection((CDATASection) node);
        break;
    case Node.PROCESSING_INSTRUCTION_NODE:
        visitProcessingInstruction((ProcessingInstruction) node);
        break;
    case Node.ENTITY_REFERENCE_NODE:
        visitReference((EntityReference) node);
        break;
    case Node.COMMENT_NODE:
        visitComment((Comment) node);
        break;
    case Node.DOCUMENT_TYPE_NODE:
        break;
    case Node.ATTRIBUTE_NODE:
    case Node.ENTITY_NODE:
    default:
        throw new XMLStreamException("Unexpected DOM Node Type "
            + node.getNodeType()
        );
    }
}

Source File: DebugParseFilter.java From storm-crawler with Apache License 2.0

5 votes

@Override
public void filter(String URL, byte[] content, DocumentFragment doc,
        ParseResult parse) {

    try {
        XMLSerializer serializer = new XMLSerializer(os, null);
        serializer.serialize(doc);
        os.flush();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

org.w3c.dom.DocumentFragment Java Examples