001: /* StdXMLParser.java NanoXML/Java
002: *
003: * $Revision: 1.5 $
004: * $Date: 2002/03/24 11:37:00 $
005: * $Name: RELEASE_2_2_1 $
006: *
007: * This file is part of NanoXML 2 for Java.
008: * Copyright (C) 2000-2002 Marc De Scheemaecker, All Rights Reserved.
009: *
010: * This software is provided 'as-is', without any express or implied warranty.
011: * In no event will the authors be held liable for any damages arising from the
012: * use of this software.
013: *
014: * Permission is granted to anyone to use this software for any purpose,
015: * including commercial applications, and to alter it and redistribute it
016: * freely, subject to the following restrictions:
017: *
018: * 1. The origin of this software must not be misrepresented; you must not
019: * claim that you wrote the original software. If you use this software in
020: * a product, an acknowledgment in the product documentation would be
021: * appreciated but is not required.
022: *
023: * 2. Altered source versions must be plainly marked as such, and must not be
024: * misrepresented as being the original software.
025: *
026: * 3. This notice may not be removed or altered from any source distribution.
027: */
028:
029: package net.n3.nanoxml;
030:
031: import java.io.IOException;
032: import java.io.CharArrayReader;
033: import java.io.Reader;
034: import java.util.Enumeration;
035: import java.util.Properties;
036: import java.util.Vector;
037:
038: /**
039: * StdXMLParser is the core parser of NanoXML.
040: *
041: * @author Marc De Scheemaecker
042: * @version $Name: RELEASE_2_2_1 $, $Revision: 1.5 $
043: */
044: public class StdXMLParser implements IXMLParser {
045:
046: /**
047: * The builder which creates the logical structure of the XML data.
048: */
049: private IXMLBuilder builder;
050:
051: /**
052: * The reader from which the parser retrieves its data.
053: */
054: private IXMLReader reader;
055:
056: /**
057: * The entity resolver.
058: */
059: private IXMLEntityResolver entityResolver;
060:
061: /**
062: * The validator that will process entity references and validate the XML
063: * data.
064: */
065: private IXMLValidator validator;
066:
067: /**
068: * Creates a new parser.
069: */
070: public StdXMLParser() {
071: this .builder = null;
072: this .validator = null;
073: this .reader = null;
074: this .entityResolver = new XMLEntityResolver();
075: }
076:
077: /**
078: * Cleans up the object when it's destroyed.
079: */
080: protected void finalize() throws Throwable {
081: this .builder = null;
082: this .reader = null;
083: this .entityResolver = null;
084: this .validator = null;
085: super .finalize();
086: }
087:
088: /**
089: * Sets the builder which creates the logical structure of the XML data.
090: *
091: * @param builder the non-null builder
092: */
093: public void setBuilder(IXMLBuilder builder) {
094: this .builder = builder;
095: }
096:
097: /**
098: * Returns the builder which creates the logical structure of the XML data.
099: *
100: * @return the builder
101: */
102: public IXMLBuilder getBuilder() {
103: return this .builder;
104: }
105:
106: /**
107: * Sets the validator that validates the XML data.
108: *
109: * @param validator the non-null validator
110: */
111: public void setValidator(IXMLValidator validator) {
112: this .validator = validator;
113: }
114:
115: /**
116: * Returns the validator that validates the XML data.
117: *
118: * @return the validator
119: */
120: public IXMLValidator getValidator() {
121: return this .validator;
122: }
123:
124: /**
125: * Sets the entity resolver.
126: *
127: * @param resolver the non-null resolver
128: */
129: public void setResolver(IXMLEntityResolver resolver) {
130: this .entityResolver = resolver;
131: }
132:
133: /**
134: * Returns the entity resolver.
135: *
136: * @return the non-null resolver
137: */
138: public IXMLEntityResolver getResolver() {
139: return this .entityResolver;
140: }
141:
142: /**
143: * Sets the reader from which the parser retrieves its data.
144: *
145: * @param reader the reader
146: */
147: public void setReader(IXMLReader reader) {
148: this .reader = reader;
149: }
150:
151: /**
152: * Returns the reader from which the parser retrieves its data.
153: *
154: * @return the reader
155: */
156: public IXMLReader getReader() {
157: return this .reader;
158: }
159:
160: /**
161: * Parses the data and lets the builder create the logical data structure.
162: *
163: * @return the logical structure built by the builder
164: *
165: * @throws net.n3.nanoxml.XMLException
166: * if an error occurred reading or parsing the data
167: */
168: public Object parse() throws XMLException {
169: try {
170: this .builder.startBuilding(this .reader.getSystemID(),
171: this .reader.getLineNr());
172: this .scanData();
173: return this .builder.getResult();
174: } catch (XMLException e) {
175: throw e;
176: } catch (Exception e) {
177: throw new XMLException(e);
178: }
179: }
180:
181: /**
182: * Scans the XML data for elements.
183: *
184: * @throws java.lang.Exception
185: * if something went wrong
186: */
187: protected void scanData() throws Exception {
188: while ((!this .reader.atEOF())
189: && (this .builder.getResult() == null)) {
190: String str = XMLUtil.read(this .reader, '&');
191: char ch = str.charAt(0);
192: if (ch == '&') {
193: XMLUtil.processEntity(str, this .reader,
194: this .entityResolver);
195: continue;
196: }
197:
198: switch (ch) {
199: case '<':
200: this .scanSomeTag(false, // don't allow CDATA
201: null, // no default namespace
202: new Properties());
203: break;
204:
205: case ' ':
206: case '\t':
207: case '\r':
208: case '\n':
209: // skip whitespace
210: break;
211:
212: default:
213: XMLUtil.errorInvalidInput(reader.getSystemID(), reader
214: .getLineNr(), "`" + ch + "' (0x"
215: + Integer.toHexString((int) ch) + ')');
216: }
217: }
218: }
219:
220: /**
221: * Scans an XML tag.
222: *
223: * @param allowCDATA true if CDATA sections are allowed at this point
224: * @param defaultNamespace the default namespace URI (or null)
225: * @param namespaces list of defined namespaces
226: *
227: * @throws java.lang.Exception
228: * if something went wrong
229: */
230: protected void scanSomeTag(boolean allowCDATA,
231: String defaultNamespace, Properties namespaces)
232: throws Exception {
233: String str = XMLUtil.read(this .reader, '&');
234: char ch = str.charAt(0);
235:
236: if (ch == '&') {
237: XMLUtil.errorUnexpectedEntity(reader.getSystemID(), reader
238: .getLineNr(), str);
239: }
240:
241: switch (ch) {
242: case '?':
243: this .processPI();
244: break;
245:
246: case '!':
247: this .processSpecialTag(allowCDATA);
248: break;
249:
250: default:
251: this .reader.unread(ch);
252: this .processElement(defaultNamespace, namespaces);
253: }
254: }
255:
256: /**
257: * Processes a "processing instruction".
258: *
259: * @throws java.lang.Exception
260: * if something went wrong
261: */
262: protected void processPI() throws Exception {
263: XMLUtil.skipWhitespace(this .reader, null);
264: String target = XMLUtil.scanIdentifier(this .reader);
265: XMLUtil.skipWhitespace(this .reader, null);
266: Reader reader = new PIReader(this .reader);
267:
268: if (!target.equalsIgnoreCase("xml")) {
269: this .builder.newProcessingInstruction(target, reader);
270: }
271:
272: reader.close();
273: }
274:
275: /**
276: * Processes a tag that starts with a bang (<!...>).
277: *
278: * @param allowCDATA true if CDATA sections are allowed at this point
279: *
280: * @throws java.lang.Exception
281: * if something went wrong
282: */
283: protected void processSpecialTag(boolean allowCDATA)
284: throws Exception {
285: String str = XMLUtil.read(this .reader, '&');
286: char ch = str.charAt(0);
287:
288: if (ch == '&') {
289: XMLUtil.errorUnexpectedEntity(reader.getSystemID(), reader
290: .getLineNr(), str);
291: }
292:
293: switch (ch) {
294: case '[':
295: if (allowCDATA) {
296: this .processCDATA();
297: } else {
298: XMLUtil.errorUnexpectedCDATA(reader.getSystemID(),
299: reader.getLineNr());
300: }
301:
302: return;
303:
304: case 'D':
305: this .processDocType();
306: return;
307:
308: case '-':
309: XMLUtil.skipComment(this .reader);
310: return;
311: }
312: }
313:
314: /**
315: * Processes a CDATA section.
316: *
317: * @throws java.lang.Exception
318: * if something went wrong
319: */
320: protected void processCDATA() throws Exception {
321: if (!XMLUtil.checkLiteral(this .reader, "CDATA[")) {
322: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
323: .getLineNr(), "<![[CDATA[");
324: }
325:
326: this .validator.PCDataAdded(this .reader.getSystemID(),
327: this .reader.getLineNr());
328: Reader reader = new CDATAReader(this .reader);
329: this .builder.addPCData(reader, this .reader.getSystemID(),
330: this .reader.getLineNr());
331: reader.close();
332: }
333:
334: /**
335: * Processes a document type declaration.
336: *
337: * @throws java.lang.Exception
338: * if an error occurred reading or parsing the data
339: */
340: protected void processDocType() throws Exception {
341: if (!XMLUtil.checkLiteral(this .reader, "OCTYPE")) {
342: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
343: .getLineNr(), "<!DOCTYPE");
344: return;
345: }
346:
347: XMLUtil.skipWhitespace(this .reader, null);
348: String systemID = null;
349: StringBuffer publicID = new StringBuffer();
350: String rootElement = XMLUtil.scanIdentifier(this .reader);
351: XMLUtil.skipWhitespace(this .reader, null);
352: char ch = this .reader.read();
353:
354: if (ch == 'P') {
355: systemID = XMLUtil.scanPublicID(publicID, reader);
356: XMLUtil.skipWhitespace(this .reader, null);
357: ch = this .reader.read();
358: } else if (ch == 'S') {
359: systemID = XMLUtil.scanSystemID(reader);
360: XMLUtil.skipWhitespace(this .reader, null);
361: ch = this .reader.read();
362: }
363:
364: if (ch == '[') {
365: this .validator.parseDTD(publicID.toString(), this .reader,
366: this .entityResolver, false);
367: XMLUtil.skipWhitespace(this .reader, null);
368: ch = this .reader.read();
369: }
370:
371: if (ch != '>') {
372: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
373: .getLineNr(), "`>'");
374: }
375:
376: if (systemID != null) {
377: Reader reader = this .reader.openStream(publicID.toString(),
378: systemID);
379: this .reader.startNewStream(reader);
380: this .reader.setSystemID(systemID);
381: this .reader.setPublicID(publicID.toString());
382: this .validator.parseDTD(publicID.toString(), this .reader,
383: this .entityResolver, true);
384: }
385: }
386:
387: /**
388: * Processes a regular element.
389: *
390: * @param defaultNamespace the default namespace URI (or null)
391: * @param namespaces list of defined namespaces
392: *
393: * @throws java.lang.Exception
394: * if something went wrong
395: */
396: protected void processElement(String defaultNamespace,
397: Properties namespaces)
398: throws Exception
399: {
400: String fullName = XMLUtil.scanIdentifier(this .reader);
401: String name = fullName;
402: XMLUtil.skipWhitespace(this .reader, null);
403: String prefix = null;
404: int colonIndex = name.indexOf(':');
405:
406: if (colonIndex > 0) {
407: prefix = name.substring(0, colonIndex);
408: name = name.substring(colonIndex + 1);
409: }
410:
411: Vector attrNames = new Vector();
412: Vector attrValues = new Vector();
413: Vector attrTypes = new Vector();
414:
415: this .validator.elementStarted(fullName,
416: this .reader.getSystemID(),
417: this .reader.getLineNr());
418: char ch;
419:
420: for (;;) {
421: ch = this .reader.read();
422:
423: if ((ch == '/') || (ch == '>')) {
424: break;
425: }
426:
427: this .reader.unread(ch);
428: this .processAttribute(attrNames, attrValues, attrTypes);
429: XMLUtil.skipWhitespace(this .reader, null);
430: }
431:
432: Properties extraAttributes = new Properties();
433: this .validator.elementAttributesProcessed(fullName,
434: extraAttributes,
435: this .reader.getSystemID(),
436: this .reader.getLineNr());
437: Enumeration enum = extraAttributes.keys();
438:
439: while (enum.hasMoreElements()) {
440: String key = (String) enum.nextElement();
441: String value = extraAttributes.getProperty(key);
442: attrNames.addElement(key);
443: attrValues.addElement(value);
444: attrTypes.addElement("CDATA");
445: }
446:
447: for (int i = 0; i < attrNames.size(); i++) {
448: String key = (String) attrNames.elementAt(i);
449: String value = (String) attrValues.elementAt(i);
450: String type = (String) attrTypes.elementAt(i);
451:
452: if (key.equals("xmlns")) {
453: defaultNamespace = value;
454: } else if (key.startsWith("xmlns:")) {
455: namespaces.put(key.substring(6), value);
456: }
457: }
458:
459: if (prefix == null) {
460: this .builder.startElement(name, prefix, defaultNamespace,
461: this .reader.getSystemID(),
462: this .reader.getLineNr());
463: } else {
464: this .builder.startElement(name, prefix,
465: namespaces.getProperty(prefix),
466: this .reader.getSystemID(),
467: this .reader.getLineNr());
468: }
469:
470: for (int i = 0; i < attrNames.size(); i++) {
471: String key = (String) attrNames.elementAt(i);
472:
473: if (key.startsWith("xmlns")) {
474: continue;
475: }
476:
477: String value = (String) attrValues.elementAt(i);
478: String type = (String) attrTypes.elementAt(i);
479: colonIndex = key.indexOf(':');
480:
481: if (colonIndex > 0) {
482: String attPrefix = key.substring(0, colonIndex);
483: key = key.substring(colonIndex + 1);
484: this .builder.addAttribute(key, attPrefix,
485: namespaces.getProperty(attPrefix),
486: value, type);
487: } else {
488: this .builder.addAttribute(key, null, null, value, type);
489: }
490: }
491:
492: if (prefix == null) {
493: this .builder.elementAttributesProcessed(name, prefix,
494: defaultNamespace);
495: } else {
496: this .builder.elementAttributesProcessed(name, prefix,
497: namespaces
498: .getProperty(prefix));
499: }
500:
501: if (ch == '/') {
502: if (this .reader.read() != '>') {
503: XMLUtil.errorExpectedInput(reader.getSystemID(),
504: reader.getLineNr(),
505: "`>'");
506: }
507:
508: this .validator.elementEnded(name,
509: this .reader.getSystemID(),
510: this .reader.getLineNr());
511:
512: if (prefix == null) {
513: this .builder.endElement(name, prefix, defaultNamespace);
514: } else {
515: this .builder.endElement(name, prefix,
516: namespaces.getProperty(prefix));
517: }
518:
519: return;
520: }
521:
522: StringBuffer buffer = new StringBuffer(16);
523:
524: for (;;) {
525: buffer.setLength(0);
526: String str;
527:
528: for (;;) {
529: XMLUtil.skipWhitespace(this .reader, buffer);
530: str = XMLUtil.read(this .reader, '&');
531:
532: if ((str.charAt(0) == '&') && (str.charAt(1) != '#')) {
533: XMLUtil.processEntity(str, this .reader,
534: this .entityResolver);
535: } else {
536: break;
537: }
538: }
539:
540: if (str.charAt(0) == '<') {
541: str = XMLUtil.read(this .reader, '\0');
542:
543: if (str.charAt(0) == '/') {
544: XMLUtil.skipWhitespace(this .reader, null);
545: str = XMLUtil.scanIdentifier(this .reader);
546:
547: if (! str.equals(fullName)) {
548: XMLUtil.errorWrongClosingTag(reader.getSystemID(),
549: reader.getLineNr(),
550: name, str);
551: }
552:
553: XMLUtil.skipWhitespace(this .reader, null);
554:
555: if (this .reader.read() != '>') {
556: XMLUtil.errorClosingTagNotEmpty(reader.getSystemID(),
557: reader.getLineNr());
558: }
559:
560: this .validator.elementEnded(fullName,
561: this .reader.getSystemID(),
562: this .reader.getLineNr());
563: if (prefix == null) {
564: this .builder.endElement(name, prefix, defaultNamespace);
565: } else {
566: this .builder.endElement(name, prefix,
567: namespaces.getProperty(prefix));
568: }
569: break;
570: } else { // <[^/]
571: this .reader.unread(str.charAt(0));
572: this .scanSomeTag(true, //CDATA allowed
573: defaultNamespace,
574: (Properties) namespaces.clone());
575: }
576: } else { // [^<]
577: if (str.charAt(0) == '&') {
578: ch = XMLUtil.processCharLiteral(str);
579: buffer.append(ch);
580: } else {
581: reader.unread(str.charAt(0));
582: }
583: this .validator.PCDataAdded(this .reader.getSystemID(),
584: this .reader.getLineNr());
585: Reader r = new ContentReader(this .reader,
586: this .entityResolver,
587: buffer.toString());
588: this .builder.addPCData(r, this .reader.getSystemID(),
589: this .reader.getLineNr());
590: r.close();
591: }
592: }
593: }
594:
595: /**
596: * Processes an attribute of an element.
597: *
598: * @param attrNames contains the names of the attributes.
599: * @param attrValues contains the values of the attributes.
600: * @param attrTypes contains the types of the attributes.
601: *
602: * @throws java.lang.Exception
603: * if something went wrong
604: */
605: protected void processAttribute(Vector attrNames,
606: Vector attrValues, Vector attrTypes) throws Exception {
607: String key = XMLUtil.scanIdentifier(this .reader);
608: XMLUtil.skipWhitespace(this .reader, null);
609:
610: if (!XMLUtil.read(this .reader, '&').equals("=")) {
611: XMLUtil.errorExpectedInput(reader.getSystemID(), reader
612: .getLineNr(), "`='");
613: }
614:
615: XMLUtil.skipWhitespace(this .reader, null);
616: String value = XMLUtil.scanString(this .reader, '&',
617: this .entityResolver);
618: attrNames.addElement(key);
619: attrValues.addElement(value);
620: attrTypes.addElement("CDATA");
621: this.validator.attributeAdded(key, value, this.reader
622: .getSystemID(), this.reader.getLineNr());
623: }
624:
625: }
|