Java Code Examples for org.apache.xerces.parsers.DOMParser#getDocument()

The following examples show how to use org.apache.xerces.parsers.DOMParser#getDocument() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HtmlUtils.java    From openemm with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Parse an entire HTML document or a document fragment. Use lowercase translation for names of tags and attributes.
 * @param document a HTML code to parse.
 * @param encoding an encoding to use for a parser.
 * @return a parsed document representation.
 */
public static Document parseDocument(String document, String encoding) throws IOException, SAXException {
    DOMParser parser = new DOMParser(new HTMLConfiguration());

    try {
        // These URLs are predefined parameters' names (check org.cyberneko.html.HTMLConfiguration for more information)
        parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        parser.setProperty("http://cyberneko.org/html/properties/default-encoding", encoding);
    } catch (SAXNotRecognizedException | SAXNotSupportedException e) {
        logger.error("Unexpected parser configuration error occurred: " + e.getMessage());
        throw new RuntimeException(e);
    }

    StringReader reader = new StringReader(document);
    InputSource source = new InputSource(reader);
    parser.parse(source);

    return parser.getDocument();
}
 
Example 2
Source File: BingSearchAzureAPI.java    From ache with Apache License 2.0 6 votes vote down vote up
private List<String> parseXMLPage(Page page) {
    DOMParser parser = new DOMParser();
    try {
        parser.parse(new InputSource(new ByteArrayInputStream(page.getContent())));
    } catch (SAXException | IOException e) {
        throw new RuntimeException("Failed to parse search results.", e);
    }
    Document doc = parser.getDocument();
    NodeList list = doc.getElementsByTagName("d:Url");
    List<String> urls = new ArrayList<String>();
    for (int j = 0; j < list.getLength(); j++) {
        Node node = list.item(j);
        NodeList children = node.getChildNodes();
        Node child = children.item(0);
        urls.add(child.getTextContent());
    }
    return urls;
}
 
Example 3
Source File: DefaultDOMSource.java    From CSSBox with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public Document parse() throws SAXException, IOException
{
    //temporay NekoHTML fix until nekohtml gets fixed
    if (!neko_fixed)
    {
        HTMLElements.Element li = HTMLElements.getElement(HTMLElements.LI);
        HTMLElements.Element[] oldparents = li.parent;
        li.parent = new HTMLElements.Element[oldparents.length + 1];
        for (int i = 0; i < oldparents.length; i++)
            li.parent[i] = oldparents[i];
        li.parent[oldparents.length] = HTMLElements.getElement(HTMLElements.MENU);
        neko_fixed = true;
    }
    
    DOMParser parser = new DOMParser(new HTMLConfiguration());
    parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
    if (charset != null)
        parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
    parser.parse(new org.xml.sax.InputSource(getDocumentSource().getInputStream()));
    return parser.getDocument();
}
 
Example 4
Source File: DOMSource.java    From jStyleParser with GNU Lesser General Public License v3.0 6 votes vote down vote up
public Document parse() throws SAXException, IOException
{
    DOMParser parser = new DOMParser(new HTMLConfiguration());
    parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
    parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
    if (charset != null)
        parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
    
    //preparation for filters, not used now
    /*XMLDocumentFilter attributeFilter = new DOMAttributeFilter();
    XMLDocumentFilter[] filters = { attributeFilter };
    parser.setProperty("http://cyberneko.org/html/properties/filters", filters);*/        
    
    parser.parse(new org.xml.sax.InputSource(is));
    doc = parser.getDocument();
    return doc;
}
 
Example 5
Source File: ActivitiWorkflowEngine.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
private String getProcessKey(InputStream workflowDefinition) throws Exception
{
    try 
    {
        InputSource inputSource = new InputSource(workflowDefinition);
        DOMParser parser = new DOMParser();
        parser.parse(inputSource);
        Document document = parser.getDocument();
        NodeList elemnts = document.getElementsByTagName("process");
        if (elemnts.getLength() < 1)
        {
            throw new IllegalArgumentException("The input stream does not contain a process definition!");
        }
        NamedNodeMap attributes = elemnts.item(0).getAttributes();
        Node idAttrib = attributes.getNamedItem("id");
        if (idAttrib == null)
        {
            throw new IllegalAccessError("The process definition does not have an id!");
        }
        
        if(activitiUtil.isMultiTenantWorkflowDeploymentEnabled())
        {
            // Workflow-definition is deployed tenant-aware, key should be altered
            return factory.getDomainProcessKey(idAttrib.getNodeValue());
        }
        else
        {
            return idAttrib.getNodeValue();
        }
    }
    finally
    {
        workflowDefinition.close();
    }
}
 
Example 6
Source File: XmlUtils.java    From ats-framework with Apache License 2.0 5 votes vote down vote up
/**
 * Loads an XML file from an InputStream.
 * <br>
 * Note: the source stream is closed internally 
 *
 * @param configurationFileStream the source stream
 * @return the loaded XML document
 * @throws IOException for IO error
 * @throws SAXException for parsing exception
 */
public static Document loadXMLFile( InputStream configurationFileStream ) throws IOException,
                                                                                 SAXException {

    try {
        DOMParser parser = getDomParser();
        parser.parse(new InputSource(configurationFileStream));

        return parser.getDocument();
    } finally {
        IoUtils.closeStream(configurationFileStream);
    }
}
 
Example 7
Source File: ConfigurationResource.java    From ats-framework with Apache License 2.0 5 votes vote down vote up
public void loadFromXmlFile(
                             InputStream resourceStream,
                             String resourceIdentifier ) {

    try {
        DOMParser parser = new DOMParser();

        // Required settings from the DomParser
        parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false); // otherwise
        parser.setFeature("http://apache.org/xml/features/continue-after-fatal-error", true);
        parser.setFeature("http://apache.org/xml/features/allow-java-encodings", true);
        parser.parse(new InputSource(resourceStream));

        Document doc = parser.getDocument();
        Element rootElement = doc.getDocumentElement();

        //cleanup the properties
        properties.clear();

        //init the current element path
        LinkedList<String> currentElementPath = new LinkedList<String>();

        //start reading the DOM
        NodeList rootElementChildren = rootElement.getChildNodes();
        for (int i = 0; i < rootElementChildren.getLength(); i++) {
            Node rootElementChild = rootElementChildren.item(i);
            if (rootElementChild.getNodeType() == Node.ELEMENT_NODE) {
                readXmlElement(currentElementPath, (Element) rootElementChild);
            }
        }
    } catch (SAXException e) {
        throw new ConfigurationException("Error while parsing config file '" + resourceIdentifier + "'",
                                         e);
    } catch (IOException ioe) {
        throw new ConfigurationException("Error while parsing config file '" + resourceIdentifier + "'",
                                         ioe);
    }
}
 
Example 8
Source File: Xerces.java    From pdfxtk with Apache License 2.0 5 votes vote down vote up
public Document parseDocument(InputSource input, boolean validate)
  throws SAXException, IOException
{
  DOMParser parser = new DOMParser();

  parser.setFeature("http://xml.org/sax/features/validation", validate);
  parser.setEntityResolver(this);
  parser.parse(input);

  return parser.getDocument();
}
 
Example 9
Source File: XmlUtils.java    From ats-framework with Apache License 2.0 3 votes vote down vote up
/**
 * Loads an XML file from a String.
 *
 * @param xmlContentsStr the source file as String
 * @return the loaded XML document
 * @throws IOException for IO error
 * @throws SAXException for parsing exception
 */
public static Document loadXML( String xmlContentsStr ) throws IOException,
                                                               SAXException {

    DOMParser parser = getDomParser();
    parser.parse(xmlContentsStr);

    return parser.getDocument();

}