001: package hotsax.html.sax;
002:
003: import org.xml.sax.*;
004: import org.xml.sax.helpers.*;
005: import org.xml.sax.ext.*;
006:
007: import java.io.*;
008: import java.util.*;
009:
010: /**
011: * ParserDelegate - provides a clean interface between the
012: * Byacc/J generated HtmlParse and the SaxParser.
013: */
014:
015: // TODO: Make this an interface, provide and implementation model
016: // This cleanly separates what is to be done from actually doing it
017: // That way a HtmlParser+HtmlLexer combo can be combined
018: // with something other than a SaxParser (Say a DOM)
019: public class ParserDelegate {
020:
021: private HtmlParser parser = null;
022: private XMLReader reader = null;
023: private ContentHandler contentHandler = null;
024: private LexicalHandler lexicalHandler = null; // this one my not exist for Sax parser/Sax client combo
025:
026: private org.xml.sax.helpers.AttributesImpl attrList; // collect attributes in a list
027:
028: public ParserDelegate(HtmlParser HtmlParser) {
029: this .parser = parser;
030: attrList = new org.xml.sax.helpers.AttributesImpl();
031: }
032:
033: // ContentHandler interface methods.
034: // If any of these fire a SAXException, it is reported to parser.yyerror()
035:
036: /**
037: * Parse a startDocument event and pass it to the resigtered content handler.
038: * This method fires in response to a HtmlParser.EOF lexer token beging recognised.
039: * SOF is a virtual token fired as the first event after the file is opened.
040: */
041: public void startDocument() {
042: try {
043: if (contentHandler != null)
044: contentHandler.startDocument();
045:
046: } catch (SAXException ex) {
047: parser.yyerror(ex.getMessage());
048: }
049: }
050:
051: /**
052: * Parse a PI and pass it to the contentHandler event
053: * (does not pass xml declaration: <?xml version = 1>)
054: * Separates the target from the data by using whitespace.
055: *
056: */
057: public void processingInstruction(HtmlParserVal lval) {
058: try {
059: if (contentHandler != null) {
060: StringTokenizer stok = new StringTokenizer(lval.sval); // default delim = \sp
061:
062: if (stok.hasMoreElements()) {
063: String target = stok.nextToken();
064: String data;
065: if (stok.hasMoreElements())
066: data = stok.nextToken();
067: else
068: data = "";
069: if (!target.equals("xml"))
070: contentHandler.processingInstruction(target,
071: data);
072: }
073: }
074: } catch (SAXException ex) {
075: parser.yyerror(ex.getMessage());
076: }
077: }
078:
079: /**
080: * Initialize the start of a start element. Prepares the attribute list
081: * to collect any attributes.
082: */
083: public void startElement() {
084: attrList.clear();
085: }
086:
087: /**
088: * Adds an attribute to the list. The name of the attribute is normalized
089: * to lowercase
090: */
091: public void addAttribute(HtmlParserVal lval) {
092: if (lval instanceof hotsax.html.sax.Attribute) {
093: Attribute attr = (Attribute) lval;
094: attrList.addAttribute("", "", attr.getName().toLowerCase(),
095: "NMTOKEN", attr.getValue());
096: } else {
097: System.err.println("Parser passed "
098: + lval.getClass().getName()
099: + " to delegate expecting Attribute");
100: }
101: }
102:
103: /**
104: * Fire startElement event. Note handled the actual beginning of the element by now
105: * and have collected all attributes (if any)
106: */
107: public void startElement(HtmlParserVal lval) {
108: try {
109: if (contentHandler != null) {
110: contentHandler
111: .startElement("", lval.sval, "", attrList);
112: }
113: } catch (SAXException ex) {
114: parser.yyerror(ex.getMessage());
115: }
116: }
117:
118: /**
119: * collect characters from parse stream. Unwrap the HtmlParserVal.sval
120: * String to a character array.
121: * TODO: After creating a LexicalHandler, make sure this gets called
122: * in the comment state.
123: * TODO: This might be better done in the collection process
124: * rather than always using a String. I.e. getting a bunch of chars instead of
125: * incrementally appending one char at a time from yytext()
126: */
127: public void characters(HtmlParserVal lval) {
128: try {
129: if (contentHandler != null) // first unwrap to wrap later? for speed?
130: {
131: char ch[] = lval.sval.toCharArray();
132: contentHandler.characters(ch, 0, lval.sval.length());
133: }
134: } catch (SAXException ex) {
135: parser.yyerror(ex.getMessage());
136: }
137: }
138:
139: /**
140: * Fire endElement event. The name of the element is passed to the event handler.
141: * Note these might be optionally missing in the HTML case.
142: */
143: public void endElement(HtmlParserVal lval) {
144: try {
145: if (contentHandler != null)
146: contentHandler.endElement("", lval.sval, "");
147: } catch (SAXException ex) {
148: parser.yyerror(ex.getMessage());
149: }
150: }
151:
152: /**
153: * Fire endDocument event.
154: */
155: public void endDocument() {
156: try {
157: if (contentHandler != null)
158: contentHandler.endDocument();
159: } catch (SAXException ex) {
160: parser.yyerror(ex.getMessage());
161: }
162: }
163:
164: // LexicalHandler interface functions.
165:
166: /**
167: * comment handler
168: * Note, these are delegate to the XMLReader's LexicalHandler if any
169: * TODO: Check the propery of the reader for its existance.
170: * TODO: add LexicalHandler to Sax client
171: */
172: public void comment(HtmlParserVal lval) {
173: try {
174: if (lexicalHandler != null) {
175: char ch[] = lval.sval.toCharArray();
176: lexicalHandler.comment(ch, 0, lval.sval.length());
177: }
178: } catch (SAXException ex) {
179: parser.yyerror(ex.getMessage());
180: }
181: }
182:
183: /**
184: * CDATA handler
185: * Note, these are delegate to the XMLReader's LexicalHandler if any
186: * This only marks the start boundary condition. Text still goes through characters()
187: */
188: public void startCDATA() {
189: try {
190: if (lexicalHandler != null) {
191: lexicalHandler.startCDATA();
192: }
193: } catch (SAXException ex) {
194: parser.yyerror(ex.getMessage());
195: }
196: }
197:
198: /**
199: * CDATA handler
200: * Note, these are delegate to the XMLReader's LexicalHandler if any
201: * This only marks the end boundary of the CDATA section. Text still goes through characters()
202: */
203: public void endCDATA() {
204: try {
205: if (lexicalHandler != null) {
206: lexicalHandler.endCDATA();
207: }
208: } catch (SAXException ex) {
209: parser.yyerror(ex.getMessage());
210: }
211: }
212:
213: /**
214: * Start the beginning of the DOCTYPE (DTD) declaration
215: * Note, these are delegate to the XMLReader's LexicalHandler if any
216: */
217: public void startDTD(HtmlParserVal lval) {
218: try {
219: if (lexicalHandler != null) {
220: StringTokenizer stok = new StringTokenizer(lval.sval); // default delim = \sp
221:
222: if (stok.hasMoreElements()) {
223: String target = stok.nextToken();
224: String data;
225: if (stok.hasMoreElements())
226: data = stok.nextToken();
227: else
228: data = "";
229:
230: lexicalHandler.startDTD(target, data, null);
231: }
232: }
233: } catch (SAXException ex) {
234: parser.yyerror(ex.getMessage());
235: }
236: }
237:
238: /**
239: * End the DOCTYPE declaration
240: */
241: public void endDTD() {
242: try {
243: if (lexicalHandler != null)
244: lexicalHandler.endDTD();
245: } catch (SAXException ex) {
246: parser.yyerror(ex.getMessage());
247: }
248: }
249:
250: /**
251: * used by the SaxParser to set itself in ParserDelegate
252: */
253: public void setSaxParser(XMLReader reader) {
254: this .reader = reader;
255:
256: try {
257: if (reader != null) {
258: contentHandler = reader.getContentHandler(); // good idea to init first
259: lexicalHandler = (LexicalHandler) reader
260: .getProperty("http://xml.org/sax/properties/lexical-handler");
261: }
262: } catch (SAXNotRecognizedException ex) {
263: System.err.println("no lexical handler installed");
264: } catch (SAXNotSupportedException ex) {
265: System.err.println("no lexical handler installed");
266: }
267:
268: }
269:
270: }
|