001: /*
002: * Copyright 2004 Outerthought bvba and Schaubroeck nv
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.outerj.daisy.htmlcleaner;
017:
018: import java.util.*;
019:
020: /**
021: * This is a thread-safe, reusable object containing the configuration for
022: * the HtmlCleaner. Instances of this object can be obtained from the
023: * {@link HtmlCleanerFactory}. A concrete HtmlCleaner can be obtained
024: * by using the method {@link #newHtmlCleaner()}.
025: */
026: public class HtmlCleanerTemplate {
027: int maxLineWidth = 80;
028: Map<String, OutputElementDescriptor> outputElementDescriptors = new HashMap<String, OutputElementDescriptor>();
029: Set<String> allowedSpanClasses = new HashSet<String>();
030: Set<String> allowedDivClasses = new HashSet<String>();
031: Set<String> allowedParaClasses = new HashSet<String>();
032: Set<String> allowedPreClasses = new HashSet<String>();
033: Set<String> dropDivClasses = new HashSet<String>();
034: Map<String, ElementDescriptor> descriptors = new HashMap<String, ElementDescriptor>();
035: String imgAlternateSrcAttr;
036: String linkAlternateHrefAttr;
037: private boolean initialised = false;
038:
039: HtmlCleanerTemplate() {
040: // package-private constructor
041: }
042:
043: void addOutputElement(String tagName, int beforeOpen,
044: int afterOpen, int beforeClose, int afterClose,
045: boolean inline) {
046: if (initialised)
047: throw new IllegalStateException();
048: if (tagName == null)
049: throw new NullPointerException();
050: OutputElementDescriptor descriptor = new OutputElementDescriptor(
051: beforeOpen, afterOpen, beforeClose, afterClose, inline);
052: outputElementDescriptors.put(tagName, descriptor);
053: }
054:
055: void setMaxLineWidth(int lineWidth) {
056: if (initialised)
057: throw new IllegalStateException();
058: this .maxLineWidth = lineWidth;
059: }
060:
061: void addAllowedSpanClass(String clazz) {
062: if (initialised)
063: throw new IllegalStateException();
064: if (clazz == null)
065: throw new NullPointerException();
066: allowedSpanClasses.add(clazz);
067: }
068:
069: void addAllowedDivClass(String clazz) {
070: if (initialised)
071: throw new IllegalStateException();
072: if (clazz == null)
073: throw new NullPointerException();
074: allowedDivClasses.add(clazz);
075: }
076:
077: void addDropDivClass(String clazz) {
078: if (initialised)
079: throw new IllegalStateException();
080: if (clazz == null)
081: throw new NullPointerException();
082: dropDivClasses.add(clazz);
083: }
084:
085: void addAllowedParaClass(String clazz) {
086: if (initialised)
087: throw new IllegalStateException();
088: if (clazz == null)
089: throw new NullPointerException();
090: allowedParaClasses.add(clazz);
091: }
092:
093: void addAllowedPreClass(String clazz) {
094: if (initialised)
095: throw new IllegalStateException();
096: if (clazz == null)
097: throw new NullPointerException();
098: allowedPreClasses.add(clazz);
099: }
100:
101: void addAllowedElement(String tagName, String[] attributes) {
102: if (initialised)
103: throw new IllegalStateException();
104: if (tagName == null)
105: throw new NullPointerException();
106:
107: ElementDescriptor descriptor = new ElementDescriptor(tagName);
108: for (String attribute : attributes) {
109: descriptor.addAttribute(attribute);
110: }
111:
112: descriptors.put(tagName, descriptor);
113: }
114:
115: void initialize() throws Exception {
116: if (initialised)
117: throw new IllegalStateException();
118: // build our descriptor model:
119: // - retrieve the one for XHTML (so that we have information about content models)
120: // - filter it to only contain the elements the user configured
121: Map<String, ElementDescriptor> full = new XhtmlDescriptorBuilder()
122: .build();
123: relax(full);
124: narrow(full, descriptors);
125: descriptors = full;
126: initialised = true;
127: }
128:
129: /**
130: * Modifies the full map so that it only contains elements and attributes
131: * from the subset, but retains the child element information.
132: */
133: private void narrow(Map<String, ElementDescriptor> full, Map subset) {
134: String[] fullKeys = full.keySet().toArray(
135: new String[full.size()]);
136: for (String fullKey : fullKeys) {
137: if (!subset.containsKey(fullKey))
138: full.remove(fullKey);
139: }
140:
141: for (ElementDescriptor elementDescriptor : full.values()) {
142: String[] childNames = elementDescriptor.getChildren()
143: .toArray(new String[0]);
144: Set<String> newChilds = new HashSet<String>();
145: for (String childName : childNames) {
146: if (subset.containsKey(childName))
147: newChilds.add(childName);
148: }
149: elementDescriptor.setChildren(newChilds);
150: elementDescriptor.setAttributes(((ElementDescriptor) subset
151: .get(elementDescriptor.getName())).getAttributes());
152: }
153: }
154:
155: private void relax(Map<String, ElementDescriptor> descriptors) {
156: // HTML doesn't allow ul's to be nested directly, but that's what all these HTML
157: // editors create, so relax that restriction a bit
158: ElementDescriptor ulDescriptor = descriptors.get("ul");
159: if (ulDescriptor != null) {
160: ulDescriptor.getChildren().add("ul");
161: ulDescriptor.getChildren().add("ol");
162: }
163:
164: ElementDescriptor olDescriptor = descriptors.get("ol");
165: if (olDescriptor != null) {
166: olDescriptor.getChildren().add("ul");
167: olDescriptor.getChildren().add("ol");
168: }
169:
170: // In fact, the gecko HTML editor can't even handle the correct thing, so
171: // force ul/ul and ol/ol nesting
172: ElementDescriptor liDescriptor = descriptors.get("li");
173: if (liDescriptor != null) {
174: liDescriptor.getChildren().remove("ul");
175: liDescriptor.getChildren().remove("ol");
176: }
177: }
178:
179: public HtmlCleaner newHtmlCleaner() {
180: return new HtmlCleaner(this );
181: }
182:
183: void setImgAlternateSrcAttr(String name) {
184: this .imgAlternateSrcAttr = name;
185: }
186:
187: public void setLinkAlternateHrefAttr(String linkAlternateHrefAttr) {
188: this.linkAlternateHrefAttr = linkAlternateHrefAttr;
189: }
190: }
|