Filter:   InfoImg
download XhtmlDescriptorBuilder.java
Language: Java
License: AL20
Copyright: Copyright 2004 Outerthought bvba and Schaubroeck nv
LOC: 87
Project Info
Daisy
Server: CocoonDev
Type: svn
...g\outerj\daisy\htmlcleaner\
   ElementDescriptor.java
   ...oCorruptTagCleaner.java
   HtmlCleaner.java
   HtmlCleanerFactory.java
   HtmlCleanerTemplate.java
   HtmlRepairer.java
   ...acterEventsHandler.java
   NekoHtmlParser.java
   ...tElementDescriptor.java
   StylingHtmlSerializer.java
   xhtml-lat1.ent
   xhtml-special.ent
   xhtml-symbol.ent
   xhtml1-strict.dtd
   ...lDescriptorBuilder.java

/*
 * Copyright 2004 Outerthought bvba and Schaubroeck nv
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.outerj.daisy.htmlcleaner;

import org.cyberneko.dtd.parsers.DOMParser;
import org.xml.sax.InputSource;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.TreeWalker;

import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

class XhtmlDescriptorBuilder {
    /**
     * Returns a Map containing ElementDescriptors for all elements in the XHTML strict DTD.
     */
    public Map build() throws Exception {
        DOMParser parser = new DOMParser();

        URL dtdURL = getClass().getClassLoader().getResource("org/outerj/daisy/htmlcleaner/xhtml1-strict.dtd");

        InputSource inputSource = new InputSource();
        inputSource.setByteStream(dtdURL.openStream());
        inputSource.setSystemId(dtdURL.toExternalForm());

        parser.parse(inputSource);
        Document document = parser.getDocument();

        Element dtdElement = document.getDocumentElement();
        Element externalSubset = findChildElement(dtdElement, "externalSubset");

        // build ElementDescriptors for each element
        Element[] elementDecls = findChildElements(externalSubset, "elementDecl");
        HashMap elementDescriptors = new HashMap();
        for (int i = 0; i < elementDecls.length; i++) {
            Element elementDecl = elementDecls[i];
            String name = elementDecl.getAttribute("ename");
            elementDescriptors.put(name, new ElementDescriptor(name));
        }

        // add attribute information to them
        Element[] attlists = findChildElements(externalSubset, "attlist");
        for (int i = 0; i < attlists.length; i++) {
            Element attlist = attlists[i];
            String ename = attlist.getAttribute("ename");
            Element[] attributeDecls = findChildElements(attlist, "attributeDecl");
            ElementDescriptor descriptor = (ElementDescriptor)elementDescriptors.get(ename);
            for (int j = 0; j < attributeDecls.length; j++) {
                String aname = attributeDecls[j].getAttribute("aname");
                if (!aname.equals("xmlns") && !aname.startsWith("xml:"))
                    descriptor.addAttribute(aname);
            }
        }

        // add child element information to them
        Element[] contentModels = findChildElements(externalSubset, "contentModel");
        for (int i = 0; i < contentModels.length; i++) {
            Element contentModel = contentModels[i];

            String ename = contentModel.getAttribute("ename");
            Element[] elements = findDescendants(contentModel, "element");
            ElementDescriptor descriptor = (ElementDescriptor)elementDescriptors.get(ename);
            for (int j = 0; j < elements.length; j++) {
                String name = elements[j].getAttribute("name");
                    descriptor.addChild(name);
            }
        }

        return elementDescriptors;
    }

    private Element findChildElement(Element element, String name) {
        NodeList children = element.getChildNodes();
        for (int i = 0; i < children.getLength(); i++) {
            Node node = children.item(i);
            if (node instanceof Element && node.getNodeName().equals(name))
                return (Element)node;
        }
        throw new RuntimeException("Did not find expected element: " + name);
    }

    private Element[] findChildElements(Element element, String name) {
        ArrayList foundElements = new ArrayList();
        NodeList children = element.getChildNodes();
        for (int i = 0; i < children.getLength(); i++) {
            Node node = children.item(i);
            if (node instanceof Element && node.getNodeName().equals(name))
                foundElements.add(node);
        }
        return (Element[])foundElements.toArray(new Element[foundElements.size()]);
    }

    private Element[] findDescendants(Element element, String name) {
        ArrayList foundElements = new ArrayList();
        TreeWalker walker = ((DocumentTraversal)element.getOwnerDocument()).createTreeWalker(element, NodeFilter.SHOW_ELEMENT, null, false);
        while (walker.nextNode() != null) {
            Element currentEl = (Element)walker.getCurrentNode();
            if (currentEl.getNodeName().equals(name))
                foundElements.add(currentEl);
        }
        return (Element[])foundElements.toArray(new Element[foundElements.size()]);
    }
}