001: package com.sun.portal.rewriter.engines.xml.parser;
002:
003: import com.sun.portal.rewriter.Translator;
004: import com.sun.portal.rewriter.engines.PageContent;
005: import com.sun.portal.rewriter.rom.RuleSet;
006: import com.sun.portal.rewriter.util.Debug;
007:
008: import java.io.IOException;
009: import java.io.Reader;
010: import java.util.Enumeration;
011: import java.util.Properties;
012: import java.util.Vector;
013:
014: public class XMLParser {
015: private final XMLParserListener parserListener;
016: private final XMLReader reader;
017: public static final String STYLESHEET_PI = "xml-stylesheet";
018:
019: public XMLParser(final PageContent aPageContent,
020: final RuleSet aRuleSet, final Translator aTranslator) {
021: reader = new XMLReaderWrapper(aPageContent, aRuleSet,
022: aTranslator);
023: parserListener = (XMLParserListener) reader;
024: }//constructor
025:
026: public void parse() {
027: try {
028: while (!this .reader.atEOF()) {
029: String str = XMLUtil.read(this .reader, '&');
030: char ch = str.charAt(0);
031: if (ch == '&') {
032: continue;
033: }
034:
035: switch (ch) {
036: case '<':
037: this .scanSomeTag(false, // don't allow CDATA
038: null, // no default namespace
039: new Properties());
040: break;
041:
042: case ' ':
043: case '\t':
044: case '\r':
045: case '\n':
046: // skip whitespace
047: break;
048:
049: default:
050: XMLUtil.errorInvalidInput(reader.getSystemID(),
051: reader.getLineNr(), "`" + ch + "' (0x"
052: + Integer.toHexString((int) ch)
053: + ')');
054: }
055: }
056: } catch (Exception e) {
057: //Debug.recordOriginalPageWarning( "XML Parsing Exception ", e );
058: Debug.recordOriginalPageWarning("PSRW_CSPR_0022", e);
059: } finally {
060: parserListener.endParsing();
061: }
062: }
063:
064: /**
065: * Scans an XML tag.
066: *
067: * @param allowCDATA true if CDATA sections are allowed at this point
068: * @param defaultNamespace the default namespace URI (or null)
069: * @param namespaces list of defined namespaces
070: */
071: protected void scanSomeTag(boolean allowCDATA,
072: String defaultNamespace, Properties namespaces)
073: throws XMLParseException, IOException {
074: String str = XMLUtil.read(this .reader, '&');
075: char ch = str.charAt(0);
076:
077: if (ch == '&') {
078: XMLUtil.errorUnexpectedEntity(reader.getSystemID(), reader
079: .getLineNr(), str);
080: }
081:
082: switch (ch) {
083: case '?':
084: this .processPI();
085: break;
086:
087: case '!':
088: this .processSpecialTag(allowCDATA);
089: break;
090:
091: default:
092: this .reader.unread(ch);
093: this .processElement(defaultNamespace, namespaces);
094: }
095: }
096:
097: /**
098: * Processes a "processing instruction".
099: * if something went wrong
100: */
101: protected void processPI() throws IOException, XMLParseException {
102: String target = XMLUtil.scanIdentifier(reader);
103:
104: if (!STYLESHEET_PI.equals(target)) {
105: XMLUtil.skipWhitespace(this .reader, null);
106: XMLUtil.skipPI(reader);
107: return;
108: }
109:
110: XMLUtil.skipWhitespace(this .reader, null);
111: char ch;
112:
113: for (;;) {
114: ch = this .reader.read();
115: if ((ch == '?')) {
116: char ch2 = this .reader.read();
117: if (ch2 == '>')
118: break;
119:
120: this .reader.unread(ch2);
121: }
122: this .reader.unread(ch);
123: String key = XMLUtil.scanIdentifier(this .reader);
124: XMLUtil.skipWhitespace(this .reader, null);
125: if (!XMLUtil.read(this .reader, '&').equals("=")) {
126: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
127: .getLineNr(), "`='");
128: }
129: XMLUtil.skipWhitespace(this .reader, null);
130: parserListener.startPI();
131: String value = XMLUtil.scanString(this .reader, '&');
132: if (key.equals("href"))
133: parserListener.endPI(value);
134: XMLUtil.skipWhitespace(this .reader, null);
135: }
136: }
137:
138: /**
139: *
140: * Processes a tag that starts with a bang (<!...>).
141: *
142: * @param allowCDATA true if CDATA sections are allowed at this point
143: * if something went wrong
144: */
145: protected void processSpecialTag(boolean allowCDATA)
146: throws IOException, XMLParseException {
147: String str = XMLUtil.read(this .reader, '&');
148: char ch = str.charAt(0);
149:
150: if (ch == '&') {
151: XMLUtil.errorUnexpectedEntity(reader.getSystemID(), reader
152: .getLineNr(), str);
153: }
154:
155: switch (ch) {
156: case '[':
157: if (allowCDATA) {
158: this .processCDATA();
159: } else {
160: XMLUtil.errorUnexpectedCDATA(reader.getSystemID(),
161: reader.getLineNr());
162: }
163:
164: return;
165:
166: case 'D':
167: this .processDocType();
168: return;
169:
170: case '-':
171: XMLUtil.skipComment(this .reader);
172: return;
173: }
174: }
175:
176: protected void processCDATA() throws IOException, XMLParseException {
177: if (!XMLUtil.checkLiteral(this .reader, "CDATA[")) {
178: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
179: .getLineNr(), "<![[CDATA[");
180: }
181: parserListener.startCDATA();
182: XMLUtil.skipCDATA(this .reader);
183: parserListener.endCDATA();
184: }
185:
186: protected void processDocType() throws IOException,
187: XMLParseException {
188: if (!XMLUtil.checkLiteral(this .reader, "OCTYPE")) {
189: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
190: .getLineNr(), "<!DOCTYPE");
191: return;
192: }
193:
194: XMLUtil.skipWhitespace(this .reader, null);
195: String systemID = null;
196: StringBuffer publicID = new StringBuffer();
197: XMLUtil.scanIdentifier(this .reader);
198: XMLUtil.skipWhitespace(this .reader, null);
199: char ch = this .reader.read();
200:
201: if (ch == 'P') {
202: systemID = XMLUtil.scanPublicID(publicID, reader);
203: XMLUtil.skipWhitespace(this .reader, null);
204: ch = this .reader.read();
205: } else if (ch == 'S') {
206: systemID = XMLUtil.scanSystemID(reader);
207: XMLUtil.skipWhitespace(this .reader, null);
208: ch = this .reader.read();
209: }
210:
211: if (ch == '[') {
212: XMLUtil.skipWhitespace(this .reader, null);
213: ch = this .reader.read();
214: }
215: //added for inline dtd
216: //ignore the content content till the dtd ends
217: if (ch == '<') {
218: reader.unread('<');
219: XMLUtil.skipTag(reader);
220: } else {
221: if (ch != '>') {
222: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
223: .getLineNr(), "`>'");
224: }
225: }
226:
227: if (systemID != null) {
228: Reader reader = this .reader.openStream(publicID.toString(),
229: systemID);
230: this .reader.startNewStream(reader);
231: this .reader.setSystemID(systemID);
232: this .reader.setPublicID(publicID.toString());
233: }
234: }
235:
236: /**
237: * Processes a regular element.
238: *
239: * @param defaultNamespace the default namespace URI (or null)
240: * @param namespaces list of defined namespaces
241: *
242: */
243: protected void processElement(String defaultNamespace,
244: Properties namespaces) throws IOException,
245: XMLParseException {
246: String fullName = XMLUtil.scanIdentifier(this .reader);
247: parserListener.startElement(fullName);
248: String name = fullName;
249: XMLUtil.skipWhitespace(this .reader, null);
250: int colonIndex = name.indexOf(':');
251:
252: if (colonIndex > 0) {
253: name = name.substring(colonIndex + 1);
254: }
255:
256: Vector attrNames = new Vector();
257: Vector attrValues = new Vector();
258: Vector attrTypes = new Vector();
259:
260: char ch;
261:
262: for (;;) {
263: ch = this .reader.read();
264:
265: if ((ch == '/') || (ch == '>')) {
266: break;
267: }
268:
269: this .reader.unread(ch);
270: this .processAttribute(attrNames, attrValues, attrTypes);
271: XMLUtil.skipWhitespace(this .reader, null);
272: }
273:
274: Properties extraAttributes = new Properties();
275: Enumeration enumeration = extraAttributes.keys();
276:
277: while (enumeration.hasMoreElements()) {
278: String key = (String) enumeration.nextElement();
279: String value = extraAttributes.getProperty(key);
280: attrNames.addElement(key);
281: attrValues.addElement(value);
282: attrTypes.addElement("CDATA");
283: }
284:
285: for (int i = 0; i < attrNames.size(); i++) {
286: String key = (String) attrNames.elementAt(i);
287: String value = (String) attrValues.elementAt(i);
288:
289: if (key.equals("xmlns")) {
290: defaultNamespace = value;
291: } else if (key.startsWith("xmlns:")) {
292: namespaces.put(key.substring(6), value);
293: }
294: }
295:
296: for (int i = 0; i < attrNames.size(); i++) {
297: String key = (String) attrNames.elementAt(i);
298:
299: if (key.startsWith("xmlns")) {
300: continue;
301: }
302:
303: colonIndex = key.indexOf(':');
304:
305: }
306:
307: if (ch == '/') {
308: if (this .reader.read() != '>') {
309: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
310: .getLineNr(), "`>'");
311: }
312:
313: parserListener.endElement();
314:
315: return;
316: }
317:
318: StringBuffer buffer = new StringBuffer(16);
319:
320: for (;;) {
321: buffer.setLength(0);
322: String str;
323:
324: for (;;) {
325: XMLUtil.skipWhitespace(this .reader, buffer);
326: str = XMLUtil.read(this .reader, '&');
327:
328: if ((str.charAt(0) == '&') && (str.charAt(1) != '#')) {
329: } else {
330: break;
331: }
332: }
333:
334: if (str.charAt(0) == '<') {
335: str = XMLUtil.read(this .reader, '\0');
336:
337: if (str.charAt(0) == '/') {
338: XMLUtil.skipWhitespace(this .reader, null);
339: str = XMLUtil.scanIdentifier(this .reader);
340:
341: if (!str.equals(fullName)) {
342: XMLUtil.errorWrongClosingTag(reader
343: .getSystemID(), reader.getLineNr(),
344: name, str);
345: }
346:
347: XMLUtil.skipWhitespace(this .reader, null);
348:
349: if (this .reader.read() != '>') {
350: XMLUtil.errorClosingTagNotEmpty(reader
351: .getSystemID(), reader.getLineNr());
352: }
353:
354: break;
355: } else { // <[^/]
356: this .reader.unread(str.charAt(0));
357: this .scanSomeTag(true, //CDATA allowed
358: defaultNamespace, (Properties) namespaces
359: .clone());
360: }
361: } else { // [^<]
362: if (str.charAt(0) == '&') {
363: ch = XMLUtil.processCharLiteral(str);
364: buffer.append(ch);
365: } else {
366: reader.unread(str.charAt(0));
367: }
368:
369: int readOffset = parserListener.getOffset();
370: XMLUtil.skipContent(this .reader);
371: parserListener.endPCData(readOffset);
372: }
373: }
374:
375: parserListener.endElement();
376: }
377:
378: protected void processAttribute(Vector attrNames,
379: Vector attrValues, Vector attrTypes) throws IOException,
380: XMLParseException {
381: String key = XMLUtil.scanIdentifier(this .reader);
382: XMLUtil.skipWhitespace(this .reader, null);
383: if (!XMLUtil.read(this .reader, '&').equals("=")) {
384: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
385: .getLineNr(), "`='");
386: }
387:
388: XMLUtil.skipWhitespace(this .reader, null);
389: parserListener.startAttribute(key);
390: String value = XMLUtil.scanString(this .reader, '&');
391: parserListener.endAttribute(key, value);
392: attrNames.addElement(key);
393: attrValues.addElement(value);
394: attrTypes.addElement("CDATA");
395: }
396: }
|