Filter:   InfoImg
download GeckoCorruptTagCleaner.java
Language: Java
License: AL20
Copyright: Copyright 2004 Outerthought bvba and Schaubroeck nv
LOC: 26
Project Info
Daisy
Server: CocoonDev
Type: svn
...g\outerj\daisy\htmlcleaner\
   ElementDescriptor.java
   ...oCorruptTagCleaner.java
   HtmlCleaner.java
   HtmlCleanerFactory.java
   HtmlCleanerTemplate.java
   HtmlRepairer.java
   ...acterEventsHandler.java
   NekoHtmlParser.java
   ...tElementDescriptor.java
   StylingHtmlSerializer.java
   xhtml-lat1.ent
   xhtml-special.ent
   xhtml-symbol.ent
   xhtml1-strict.dtd
   ...lDescriptorBuilder.java

/*
 * Copyright 2004 Outerthought bvba and Schaubroeck nv
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.outerj.daisy.htmlcleaner;

class GeckoCorruptTagCleaner {
    /**
     * Removes invalid tags produced by gecko editor.
     *
     * <p>The Gecko Midas editor (or Mozilla Composer for that matter) sometimes
     * leaves things like '&lt;&gt;' or '&lt; /&gt;' (for those reading the
     * source: it leaves the unescaped tags), thus tags without names. This
     * is easily reproduceable by hitting enter twice followed by backspace twice
     * when in the middle of a paragraph or header.
     */
    public static String clean(String input) {
        char[] inputChars = input.toCharArray();
        StringBuffer result = new StringBuffer(input.length());

        int i = 0;
        while(i < inputChars.length) {
            char c = inputChars[i];
            if (c == '<' && i + 3 < inputChars.length) {
                if (inputChars[i + 1] == '>') {
                    i = i + 2;
                    continue;
                } else if (c == '<' && inputChars[i + 1] == ' ' && inputChars[i + 2] == '/' && inputChars[i + 3] == '>') {
                    i = i + 4;
                    continue;
                } else if (c == '<' && inputChars[i + 1] == '<') {
                    i = i + 1;
                    continue;
                }
            }
            result.append(c);
            i = i + 1;
        }

        return result.toString();
    }
}