001: /* StdXMLParser.java NanoXML/Java
002: *
003: * $Revision: 1729 $
004: * $Date: 2007-02-13 05:49:18 -0800 (Tue, 13 Feb 2007) $
005: * $Name$
006: *
007: * This file is part of NanoXML 2 for Java.
008: * Copyright (C) 2001 Marc De Scheemaecker, All Rights Reserved.
009: *
010: * This software is provided 'as-is', without any express or implied warranty.
011: * In no event will the authors be held liable for any damages arising from the
012: * use of this software.
013: *
014: * Permission is granted to anyone to use this software for any purpose,
015: * including commercial applications, and to alter it and redistribute it
016: * freely, subject to the following restrictions:
017: *
018: * 1. The origin of this software must not be misrepresented; you must not
019: * claim that you wrote the original software. If you use this software in
020: * a product, an acknowledgment in the product documentation would be
021: * appreciated but is not required.
022: *
023: * 2. Altered source versions must be plainly marked as such, and must not be
024: * misrepresented as being the original software.
025: *
026: * 3. This notice may not be removed or altered from any source distribution.
027: */
028:
029: package net.n3.nanoxml;
030:
031: import java.io.Reader;
032: import java.util.Enumeration;
033: import java.util.Properties;
034:
035: /**
036: * StdXMLParser is the core parser of NanoXML.
037: *
038: * @author Marc De Scheemaecker
039: * @version $Name$, $Revision: 1729 $
040: */
041: public class StdXMLParser implements IXMLParser {
042:
043: /**
044: * Delimiter for a processing instructions.
045: */
046: private static final char[] END_OF_PI = { '>', '?' };
047:
048: /**
049: * Delimiter for CDATA sections.
050: */
051: private static final char[] END_OF_CDATA = { '>', ']', ']' };
052:
053: /**
054: * Delimiter for PCDATA elements.
055: */
056: private static final char[] END_OF_PCDATA = { '<' };
057:
058: /**
059: * The builder which creates the logical structure of the XML data.
060: */
061: private IXMLBuilder builder;
062:
063: /**
064: * The reader from which the parser retrieves its data.
065: */
066: private IXMLReader reader;
067:
068: /**
069: * The entity resolver.
070: */
071: private IXMLEntityResolver entityResolver;
072:
073: /**
074: * The validator that will process entity references and validate the XML data.
075: */
076: private IXMLValidator validator;
077:
078: /**
079: * Creates a new parser.
080: */
081: public StdXMLParser() {
082: this .builder = null;
083: this .validator = null;
084: this .reader = null;
085: this .entityResolver = new XMLEntityResolver();
086: }
087:
088: /**
089: * Cleans up the object when it's destroyed.
090: */
091: protected void finalize() throws Throwable {
092: this .builder = null;
093: this .reader = null;
094: this .entityResolver = null;
095: this .validator = null;
096: super .finalize();
097: }
098:
099: /**
100: * Sets the builder which creates the logical structure of the XML data.
101: *
102: * @param builder the non-null builder
103: */
104: public void setBuilder(IXMLBuilder builder) {
105: this .builder = builder;
106: }
107:
108: /**
109: * Returns the builder which creates the logical structure of the XML data.
110: *
111: * @return the builder
112: */
113: public IXMLBuilder getBuilder() {
114: return this .builder;
115: }
116:
117: /**
118: * Sets the validator that validates the XML data.
119: *
120: * @param validator the non-null validator
121: */
122: public void setValidator(IXMLValidator validator) {
123: this .validator = validator;
124: }
125:
126: /**
127: * Returns the validator that validates the XML data.
128: *
129: * @return the validator
130: */
131: public IXMLValidator getValidator() {
132: return this .validator;
133: }
134:
135: /**
136: * Sets the entity resolver.
137: *
138: * @param resolver the non-null resolver
139: */
140: public void setResolver(IXMLEntityResolver resolver) {
141: this .entityResolver = resolver;
142: }
143:
144: /**
145: * Returns the entity resolver.
146: *
147: * @return the non-null resolver
148: */
149: public IXMLEntityResolver getResolver() {
150: return this .entityResolver;
151: }
152:
153: /**
154: * Sets the reader from which the parser retrieves its data.
155: *
156: * @param reader the reader
157: */
158: public void setReader(IXMLReader reader) {
159: this .reader = reader;
160: }
161:
162: /**
163: * Returns the reader from which the parser retrieves its data.
164: *
165: * @return the reader
166: */
167: public IXMLReader getReader() {
168: return this .reader;
169: }
170:
171: /**
172: * Parses the data and lets the builder create the logical data structure.
173: *
174: * @return the logical structure built by the builder
175: *
176: * @throws net.n3.nanoxml.XMLException if an error occurred reading or parsing the data
177: */
178: public Object parse() throws XMLException {
179: try {
180: this .builder.startBuilding(this .reader.getSystemID(),
181: this .reader.getLineNr());
182: this .scanData();
183: return this .builder.getResult();
184: } catch (XMLException e) {
185: throw e;
186: } catch (Exception e) {
187: throw new XMLException(e);
188: }
189: }
190:
191: /**
192: * Scans the XML data for elements.
193: *
194: * @throws java.lang.Exception if something went wrong
195: */
196: protected void scanData() throws Exception {
197: while ((!this .reader.atEOF())
198: && (this .builder.getResult() == null)) {
199: char ch = XMLUtil.read(this .reader, null, '&',
200: this .entityResolver);
201:
202: switch (ch) {
203: case '<':
204: this .scanSomeTag(false /* don't allow CDATA */);
205: break;
206:
207: case ' ':
208: case '\t':
209: case '\r':
210: case '\n':
211: // skip whitespace
212: break;
213:
214: default:
215: XMLUtil.errorInvalidInput(reader.getSystemID(), reader
216: .getLineNr(), "`" + ch + "' (0x"
217: + Integer.toHexString((int) ch) + ')');
218: }
219: }
220: }
221:
222: /**
223: * Scans an XML tag.
224: *
225: * @param allowCDATA true if CDATA sections are allowed at this point
226: *
227: * @throws java.lang.Exception if something went wrong
228: */
229: protected void scanSomeTag(boolean allowCDATA) throws Exception {
230: char ch = XMLUtil.read(this .reader, null, '&',
231: this .entityResolver);
232:
233: switch (ch) {
234: case '?':
235: this .processPI();
236: break;
237:
238: case '!':
239: this .processSpecialTag(allowCDATA);
240: break;
241:
242: default:
243: this .reader.unread(ch);
244: this .processElement();
245: }
246: }
247:
248: /**
249: * Processes a "processing instruction".
250: *
251: * @throws java.lang.Exception if something went wrong
252: */
253: protected void processPI() throws Exception {
254: XMLUtil.skipWhitespace(this .reader, '&', null, null);
255: String target = XMLUtil.scanIdentifier(this .reader, '&',
256: this .entityResolver);
257: XMLUtil.skipWhitespace(this .reader, '&', null, null);
258: Reader reader = new ContentReader(this .reader,
259: this .entityResolver, '&', StdXMLParser.END_OF_PI, true,
260: "");
261:
262: if (!"xml".equalsIgnoreCase(target)) {
263: this .builder.newProcessingInstruction(target, reader);
264: }
265:
266: reader.close();
267: }
268:
269: /**
270: * Processes a tag that starts with a bang (<!...>).
271: *
272: * @param allowCDATA true if CDATA sections are allowed at this point
273: *
274: * @throws java.lang.Exception if something went wrong
275: */
276: protected void processSpecialTag(boolean allowCDATA)
277: throws Exception {
278: char ch = XMLUtil.read(this .reader, null, '&',
279: this .entityResolver);
280:
281: switch (ch) {
282: case '[':
283: if (allowCDATA) {
284: this .processCDATA();
285: } else {
286: XMLUtil.skipTag(this .reader, '&', this .entityResolver);
287: }
288:
289: return;
290:
291: case 'D':
292: this .processDocType();
293: return;
294:
295: case '-':
296: XMLUtil.skipComment(this .reader, this .entityResolver);
297: }
298: }
299:
300: /**
301: * Processes a CDATA section.
302: *
303: * @throws java.lang.Exception if something went wrong
304: */
305: protected void processCDATA() throws Exception {
306: if (!XMLUtil.checkLiteral(this .reader, '&',
307: this .entityResolver, "CDATA[")) {
308: XMLUtil.skipTag(this .reader, '&', this .entityResolver);
309: return;
310: }
311:
312: this .validator.PCDataAdded(this .reader.getSystemID(),
313: this .reader.getLineNr());
314: Reader reader = new ContentReader(this .reader,
315: this .entityResolver, '&', StdXMLParser.END_OF_CDATA,
316: true, "");
317:
318: this .builder.addPCData(reader, this .reader.getSystemID(),
319: this .reader.getLineNr());
320: reader.close();
321: }
322:
323: /**
324: * Processes a document type declaration.
325: *
326: * @throws java.lang.Exception if an error occurred reading or parsing the data
327: */
328: protected void processDocType() throws Exception {
329: if (!XMLUtil.checkLiteral(this .reader, '&',
330: this .entityResolver, "OCTYPE")) {
331: XMLUtil.skipTag(this .reader, '&', this .entityResolver);
332: return;
333: }
334:
335: XMLUtil.skipWhitespace(this .reader, '&', null, null);
336:
337: // read the root element name
338: XMLUtil.scanIdentifier(this .reader, '&', this .entityResolver);
339:
340: String systemID = null;
341: StringBuffer publicID = new StringBuffer();
342: XMLUtil.skipWhitespace(this .reader, '&', null, null);
343: char ch = XMLUtil.read(this .reader, null, '&',
344: this .entityResolver);
345:
346: if (ch == 'P') {
347: systemID = XMLUtil.scanPublicID(publicID, reader, '&',
348: this .entityResolver);
349: XMLUtil.skipWhitespace(this .reader, '&', null, null);
350: ch = XMLUtil.read(this .reader, null, '&',
351: this .entityResolver);
352: } else if (ch == 'S') {
353: systemID = XMLUtil.scanSystemID(reader, '&',
354: this .entityResolver);
355: XMLUtil.skipWhitespace(this .reader, '&', null, null);
356: ch = XMLUtil.read(this .reader, null, '&',
357: this .entityResolver);
358: }
359:
360: if (ch == '[') {
361: this .validator.parseDTD(publicID.toString(), this .reader,
362: this .entityResolver, false);
363: XMLUtil.skipWhitespace(this .reader, '&', null, null);
364: ch = XMLUtil.read(this .reader, null, '&',
365: this .entityResolver);
366: }
367:
368: if (ch != '>') {
369: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
370: .getLineNr(), "`>'");
371: }
372:
373: if (systemID != null) {
374: Reader reader = this .reader.openStream(publicID.toString(),
375: systemID);
376: this .reader.startNewStream(reader);
377: this .reader.setSystemID(systemID);
378: this .reader.setPublicID(publicID.toString());
379: this .validator.parseDTD(publicID.toString(), this .reader,
380: this .entityResolver, true);
381: }
382: }
383:
384: /**
385: * Processes a regular element.
386: *
387: * @throws java.lang.Exception if something went wrong
388: */
389: protected void processElement() throws Exception {
390: String name = XMLUtil.scanIdentifier(this .reader, '&',
391: this .entityResolver);
392: XMLUtil.skipWhitespace(this .reader, '&', null, null);
393: String prefix = null;
394: int colonIndex = name.indexOf(':');
395:
396: if (colonIndex > 0) {
397: prefix = name.substring(0, colonIndex);
398: name = name.substring(colonIndex + 1);
399: }
400:
401: this .validator.elementStarted(name, prefix, null, this .reader
402: .getSystemID(), this .reader.getLineNr());
403: this .builder.startElement(name, prefix, null, this .reader
404: .getSystemID(), this .reader.getLineNr());
405: char ch;
406:
407: for (;;) {
408: ch = XMLUtil.read(this .reader, null, '&',
409: this .entityResolver);
410:
411: if ((ch == '/') || (ch == '>')) {
412: break;
413: }
414:
415: this .reader.unread(ch);
416: this .processAttribute();
417: XMLUtil.skipWhitespace(this .reader, '&', null, null);
418: }
419:
420: Properties extraAttributes = new Properties();
421: this .validator.elementAttributesProcessed(name, prefix, null,
422: extraAttributes, this .reader.getSystemID(), this .reader
423: .getLineNr());
424: Enumeration enumeration = extraAttributes.keys();
425:
426: while (enumeration.hasMoreElements()) {
427: String key = (String) enumeration.nextElement();
428: String value = extraAttributes.getProperty(key);
429: String attPrefix = null;
430: colonIndex = key.indexOf(':');
431:
432: if (colonIndex > 0) {
433: attPrefix = key.substring(0, colonIndex);
434: key = key.substring(colonIndex + 1);
435: }
436:
437: this .builder.addAttribute(key, attPrefix, null, value,
438: "CDATA");
439: }
440:
441: this .builder.elementAttributesProcessed(name, prefix, null);
442:
443: if (ch == '/') {
444: if (XMLUtil.read(this .reader, null, '&',
445: this .entityResolver) != '>') {
446: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
447: .getLineNr(), "`>'");
448: }
449:
450: this .validator.elementEnded(name, prefix, null, this .reader
451: .getSystemID(), this .reader.getLineNr());
452: this .builder.endElement(name, prefix, null);
453: return;
454: }
455:
456: StringBuffer whitespaceBuffer = new StringBuffer(16);
457:
458: for (;;) {
459: whitespaceBuffer.setLength(0);
460: boolean fromEntity[] = new boolean[1];
461: XMLUtil.skipWhitespace(this .reader, '&', whitespaceBuffer,
462: fromEntity);
463: ch = XMLUtil.read(this .reader, null, '&',
464: this .entityResolver);
465:
466: if ((ch == '<') && (!fromEntity[0])) {
467: ch = reader.read();
468:
469: if (ch == '/') {
470: XMLUtil
471: .skipWhitespace(this .reader, '&', null,
472: null);
473: String str = XMLUtil.scanIdentifier(this .reader,
474: '&', this .entityResolver);
475:
476: if (!str.equals(name)) {
477: XMLUtil.errorWrongClosingTag(reader
478: .getSystemID(), reader.getLineNr(),
479: name, str);
480: }
481:
482: XMLUtil
483: .skipWhitespace(this .reader, '&', null,
484: null);
485:
486: if (XMLUtil.read(this .reader, null, '&',
487: this .entityResolver) != '>') {
488: XMLUtil.errorClosingTagNotEmpty(reader
489: .getSystemID(), reader.getLineNr());
490: }
491:
492: this .validator.elementEnded(name, prefix, null,
493: this .reader.getSystemID(), this .reader
494: .getLineNr());
495: this .builder.endElement(name, prefix, null);
496: break;
497: } else {
498: this .reader.unread(ch);
499: this .scanSomeTag(true /* CDATA allowed */);
500: }
501: } else {
502: this .validator.PCDataAdded(this .reader.getSystemID(),
503: this .reader.getLineNr());
504: this .reader.unread(ch);
505: Reader reader = new ContentReader(this .reader,
506: this .entityResolver, '&',
507: StdXMLParser.END_OF_PCDATA, false,
508: whitespaceBuffer.toString());
509: this .builder.addPCData(reader, this .reader
510: .getSystemID(), this .reader.getLineNr());
511: reader.close();
512: this .reader.unread('<');
513: }
514: }
515: }
516:
517: /**
518: * Processes an attribute of an element.
519: *
520: * @throws java.lang.Exception if something went wrong
521: */
522: protected void processAttribute() throws Exception {
523: String key = XMLUtil.scanIdentifier(this .reader, '&',
524: this .entityResolver);
525: XMLUtil.skipWhitespace(this .reader, '&', null, null);
526:
527: if (XMLUtil.read(this .reader, null, '&', this .entityResolver) != '=') {
528: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
529: .getLineNr(), "`='");
530: }
531:
532: String value = XMLUtil.scanString(this .reader, '&', true,
533: this .entityResolver);
534: this .validator.attributeAdded(key, null, null, value,
535: this .reader.getSystemID(), this .reader.getLineNr());
536: this .builder.addAttribute(key, null, null, value, "CDATA");
537: }
538:
539: }
|