001: /*
002: * Created on Mar 7, 2005
003: */
004: package com.sun.portal.wireless.htmlconversion;
005:
006: import java.util.HashMap;
007:
008: import org.w3c.dom.Element;
009:
010: import com.sun.portal.wireless.htmlconversion.processors.AmlTextTagProcessor;
011: import com.sun.portal.wireless.htmlconversion.processors.AmlFormTagProcessor;
012: import com.sun.portal.wireless.htmlconversion.processors.AmlDocumentTitleTagProcessor;
013:
014: /**
015: * Generic parser callback class that is delegated to by the
016: * HtmlParserCallback and XhtmlParserCallback classes.
017: *
018: * @author ashwin.mathew@sun.com
019: */
020: public class GenericHtmlParserCallback {
021:
022: private ParserState state;
023:
024: private static final char SPACE = ' ';
025: private static final char TAB = 9;
026: private static final char NEWLINE = 10;
027:
028: public GenericHtmlParserCallback(ParserState state) {
029: this .state = state;
030: }
031:
032: /**
033: * Generic method to handle end tag events.
034: *
035: * @param tag
036: */
037: public void endTag(String tag) {
038: if (state.isBypassTagProcessing()) {
039: if (tag.equals(state.getTagProcessingResumeTag())) {
040: // We've found the resume tag, start tag processing again
041: state.resumeTagProcessing();
042: } else {
043: // We need to continue with the tag processing bypass
044: return;
045: }
046: }
047:
048: TagProcessor processor = TagProcessorRegistry.getInstance()
049: .getProcessor(tag);
050:
051: // The check for the processor being the AmlDocumentTitleTagProcessor
052: // is a bit of a hack, but it's the best available solution, since this
053: // is a very special case
054: if (state.isTextAvailable()
055: && (processor != null && !processor.getSupportedTags()[0]
056: .equals(AmlDocumentTitleTagProcessor.HTML_TITLE))) {
057: // Some AmlText may need to be output
058: Element amlText = AmlTextTagProcessor.createAmlTextElement(
059: state.getText(), state);
060:
061: GenericHtmlParserCallback.appendChildToOutputContainer(
062: state, amlText);
063:
064: state.clearText();
065: }
066:
067: Element nextOutputTag = null;
068:
069: if (processor != null) {
070: nextOutputTag = processor.endTag(tag, state);
071:
072: if (nextOutputTag != null) {
073: // Occurs only if currentOutputTag is not AmlText
074: // e.g., if it's </p> or </br>
075:
076: if (nextOutputTag != state.getCurrentOutputTag()) {
077: // Append it only if it's not the current tag
078: // - the current tag has already been appended
079: // in handleTag()
080: GenericHtmlParserCallback
081: .appendChildToOutputContainer(state,
082: nextOutputTag);
083: state.setCurrentOutputTag(nextOutputTag);
084: }
085:
086: // Don't bother setting output tag context to
087: // nextOutputTag - we want to roll back
088: // the outputTagContext for the parent tag
089: // Output tag context is set only on start of a tag
090:
091: // If nextOutputTag is null, it indicates that
092: // some portion of the DOM tree has been operated on
093: // directly - output tag context remains the same
094: }
095:
096: if (processor.canHaveChildren(state)) {
097: state.rollbackOutputContainerTag();
098: }
099:
100: if (processor.isMaintainOnState(state)) {
101: state.rollbackCurrentOutputTag();
102: }
103: }
104: }
105:
106: /**
107: * Generic method to handle start tag events.
108: * @param tag
109: * @param attributes
110: */
111: public void startTag(String tag, HashMap attributes) {
112: if (state.isBypassTagProcessing()) {
113: return;
114: }
115:
116: // call interruptedTag() on parent TagProcessor before calling
117: // startTag() on child TagProcessor, and clear text buffer in between
118:
119: Element interruptedTag = state.getCurrentOutputTag();
120:
121: TagProcessor interruptedTagProcessor = null;
122:
123: if (interruptedTag != null) {
124: interruptedTagProcessor = TagProcessorRegistry
125: .getInstance().getProcessor(
126: interruptedTag.getTagName());
127: }
128:
129: if (interruptedTagProcessor != null) {
130: Element newCurrentOutputTag = interruptedTagProcessor
131: .interruptedTag(state);
132:
133: if (newCurrentOutputTag != interruptedTag
134: && newCurrentOutputTag != null) {
135: // Occurs *only* when currentOutputTag is not AmlText
136: // For example, a text string found after a <p> or <br> tag
137: GenericHtmlParserCallback.appendChildToOutputContainer(
138: state, newCurrentOutputTag);
139:
140: state.setCurrentOutputTag(newCurrentOutputTag);
141: }
142: }
143:
144: if (state.isTextAvailable()) {
145: // Even if the processor is null, some AmlText may need to be output
146: Element amlText = AmlTextTagProcessor.createAmlTextElement(
147: state.getText(), state);
148:
149: GenericHtmlParserCallback.appendChildToOutputContainer(
150: state, amlText);
151: state.setCurrentOutputTag(amlText);
152: }
153:
154: // Now clear the text buffer
155: state.clearText();
156:
157: // And create the child
158: TagProcessor childProcessor = TagProcessorRegistry
159: .getInstance().getProcessor(tag);
160: if (childProcessor != null) {
161: Element child = childProcessor.startTag(tag, attributes,
162: state);
163:
164: if (child != null) {
165: GenericHtmlParserCallback.appendChildToOutputContainer(
166: state, child);
167:
168: state.setCurrentOutputTag(child);
169:
170: if (childProcessor.canHaveChildren(state)) {
171: state.setOutputContainerTag(child);
172: }
173: }
174: }
175: }
176:
177: /**
178: * Generic method to handle text.
179: *
180: * @param text
181: * @param start
182: * @param length
183: */
184: public void handleText(char[] text, int start, int length) {
185: if (state.isBypassTagProcessing()) {
186: return;
187: }
188:
189: // Have to explicitly check for whitespace in the case of XHTML
190: // and remove excessive amounts of it - restrict to one leading
191: // and one trailing space.
192: // Doing this in GenericHtmlParserCallback rather than
193: // XhtmlParserCallback since there appear to be intermittent bugs
194: // in the JDK HTML parser where it does return whitespace.
195:
196: int newStart = start;
197: int end = start + length - 1;
198: int newEnd = end;
199: boolean isNewStartDone = false;
200: boolean isNewEndDone = false;
201: boolean isFirstIteration = true;
202:
203: while (!isNewStartDone || !isNewEndDone) {
204: // Finished checking the entire char[]
205: if (newStart >= newEnd) {
206: newStart = 0;
207: newEnd = -1; // So that the length evals to 0
208: isNewStartDone = true;
209: isNewEndDone = true;
210: }
211:
212: if (!isNewStartDone) {
213: if (text[newStart] == SPACE
214: || text[newStart] == NEWLINE
215: || text[newStart] == TAB) {
216: newStart++;
217: } else {
218: if (!isFirstIteration) {
219: newStart--;
220: text[newStart] = SPACE;
221: }
222:
223: isNewStartDone = true;
224: }
225: }
226:
227: if (!isNewEndDone) {
228: if (text[newEnd] == SPACE || text[newEnd] == NEWLINE
229: || text[newEnd] == TAB) {
230: newEnd--;
231: } else {
232: if (!isFirstIteration) {
233: newEnd++;
234: text[newEnd] = SPACE;
235: }
236:
237: isNewEndDone = true;
238: }
239: }
240:
241: isFirstIteration = false;
242: }
243:
244: state.appendText(text, newStart, (newEnd - newStart + 1));
245: }
246:
247: public ParserState getState() {
248: return state;
249: }
250:
251: /**
252: * Appends the child to the output container. In case the child requires a form
253: * in it's parent tag hierarchy, and one is not available, the child will be
254: * appended to the last available form.
255: *
256: * @param state
257: * @param child
258: */
259: public static void appendChildToOutputContainer(ParserState state,
260: Element child) {
261: if (child != null) {
262: if (AmlFormTagProcessor.doesElementRequireForm(child
263: .getTagName())
264: && !state.isInAmlForm()) {
265: // The child requires an AmlForm element in it's
266: // parent tag hierarchy, but is not contained within
267: // an AmlForm - clearly an example of munged HTML.
268: // Best effort: append the child to the last AmlForm.
269: Element amlForm = state.getLayoutManager()
270: .getLastAmlFormTag();
271: if (amlForm != null) {
272: amlForm.appendChild(child);
273: }
274: } else {
275: Element outputContainer = state.getOutputContainerTag();
276: if (outputContainer != null) {
277: outputContainer.appendChild(child);
278: }
279: }
280: }
281: }
282:
283: }
|