001: /*
002: This code based upon NanoXML 2.2 sources
003: */
004:
005: package org.zaval.xml;
006:
007: import java.io.*;
008: import java.util.*;
009:
010: public class XmlElement {
011: static final long serialVersionUID = 6685035139346394777L;
012: public static final int NANOXML_MAJOR_VERSION = 2;
013: public static final int NANOXML_MINOR_VERSION = 2;
014:
015: private Hashtable attributes;
016: private Vector children;
017: private String name;
018: private String contents;
019: private Hashtable entities;
020:
021: private int lineNr;
022: private boolean ignoreCase;
023: private boolean ignoreWhitespace;
024: private char charReadTooMuch;
025: private Reader reader;
026: private int parserLineNr;
027:
028: public XmlElement() {
029: this (new Hashtable(), false, true, true);
030: }
031:
032: public XmlElement(Hashtable entities) {
033: this (entities, false, true, true);
034: }
035:
036: public XmlElement(boolean skipLeadingWhitespace) {
037: this (new Hashtable(), skipLeadingWhitespace, true, true);
038: }
039:
040: public XmlElement(Hashtable entities, boolean skipLeadingWhitespace) {
041: this (entities, skipLeadingWhitespace, true, true);
042: }
043:
044: public XmlElement(Hashtable entities,
045: boolean skipLeadingWhitespace, boolean ignoreCase) {
046: this (entities, skipLeadingWhitespace, true, ignoreCase);
047: }
048:
049: protected XmlElement(Hashtable entities,
050: boolean skipLeadingWhitespace,
051: boolean fillBasicConversionTable,
052: boolean ignoreCase)
053: {
054: this .ignoreWhitespace = skipLeadingWhitespace;
055: this .ignoreCase = ignoreCase;
056: this .name = null;
057: this .contents = "";
058: this .attributes = new Hashtable();
059: this .children = new Vector();
060: this .entities = entities;
061: this .lineNr = 0;
062: Enumeration enum = this .entities.keys();
063: while (enum.hasMoreElements()) {
064: Object key = enum.nextElement();
065: Object value = this .entities.get(key);
066: if (value instanceof String) {
067: value = ((String) value).toCharArray();
068: this .entities.put(key, value);
069: }
070: }
071: if (fillBasicConversionTable) {
072: this .entities.put("amp", new char[] { '&' });
073: this .entities.put("quot", new char[] { '"' });
074: this .entities.put("apos", new char[] { '\'' });
075: this .entities.put("lt", new char[] { '<' });
076: this .entities.put("gt", new char[] { '>' });
077: }
078: }
079:
080: public void addChild(XmlElement child) {
081: this .children.addElement(child);
082: }
083:
084: public void setAttribute(String name, Object value) {
085: if (this .ignoreCase) {
086: name = name.toLowerCase();
087: }
088: this .attributes.put(name, value.toString());
089: }
090:
091: public int countChildren() {
092: return this .children.size();
093: }
094:
095: public Enumeration enumerateAttributeNames() {
096: return this .attributes.keys();
097: }
098:
099: public Enumeration enumerateChildren() {
100: return this .children.elements();
101: }
102:
103: public Vector getChildren() {
104: try {
105: return (Vector) this .children.clone();
106: } catch (Exception e) {
107: // this never happens, however, some Java compilers are so
108: // braindead that they require this exception clause
109: return null;
110: }
111: }
112:
113: public String getContents() {
114: return this .getContent();
115: }
116:
117: public String getContent() {
118: return this .contents;
119: }
120:
121: public int getLineNr() {
122: return this .lineNr;
123: }
124:
125: public Object getAttribute(String name) {
126: return this .getAttribute(name, null);
127: }
128:
129: public Object getAttribute(String name, Object defaultValue) {
130: if (this .ignoreCase) {
131: name = name.toLowerCase();
132: }
133: Object value = this .attributes.get(name);
134: if (value == null) {
135: value = defaultValue;
136: }
137: return value;
138: }
139:
140: public Object getAttribute(String name, Hashtable valueSet,
141: String defaultKey, boolean allowLiterals) {
142: if (this .ignoreCase) {
143: name = name.toLowerCase();
144: }
145: Object key = this .attributes.get(name);
146: Object result;
147: if (key == null) {
148: key = defaultKey;
149: }
150: result = valueSet.get(key);
151: if (result == null) {
152: if (allowLiterals) {
153: result = key;
154: } else {
155: throw this .invalidValue(name, (String) key);
156: }
157: }
158: return result;
159: }
160:
161: public String getName() {
162: return this .name;
163: }
164:
165: public String getTagName() {
166: return this .getName();
167: }
168:
169: public void parse(Reader reader) throws IOException,
170: XmlParseException {
171: this .parse(reader, /*startingLineNr*/1);
172: }
173:
174: public void parse(Reader reader, int startingLineNr)
175: throws IOException, XmlParseException {
176: this .charReadTooMuch = '\0';
177: this .reader = reader;
178: this .parserLineNr = startingLineNr;
179:
180: for (;;) {
181: char ch = this .scanWhitespace();
182:
183: if (ch != '<') {
184: throw this .expectedInput("<");
185: }
186:
187: ch = this .readChar();
188:
189: if ((ch == '!') || (ch == '?')) {
190: this .skipSpecialTag(0);
191: } else {
192: this .unreadChar(ch);
193: this .scanElement(this );
194: return;
195: }
196: }
197: }
198:
199: public void parse(String string) throws XmlParseException {
200: try {
201: this .parse(new StringReader(string), 1);
202: } catch (IOException e) {
203: }
204: }
205:
206: public void removeChild(XmlElement child) {
207: this .children.removeElement(child);
208: }
209:
210: public void removeAttribute(String name) {
211: if (this .ignoreCase) {
212: name = name.toLowerCase();
213: }
214: this .attributes.remove(name);
215: }
216:
217: public void removeChild(String name) {
218: this .removeAttribute(name);
219: }
220:
221: protected XmlElement createAnotherElement() {
222: return new XmlElement(this .entities, this .ignoreWhitespace,
223: false, this .ignoreCase);
224: }
225:
226: public void setContent(String content) {
227: this .contents = content;
228: }
229:
230: public void setTagName(String name) {
231: this .setName(name);
232: }
233:
234: public void setName(String name) {
235: this .name = name;
236: }
237:
238: public String toString() {
239: try {
240: ByteArrayOutputStream out = new ByteArrayOutputStream();
241: this .write(new PrintStream(out));
242: out.flush();
243: return new String(out.toByteArray(), 0);
244: } catch (IOException e) {
245: // Java exception handling suxx
246: return super .toString();
247: }
248: }
249:
250: public void write(PrintStream writer)
251: throws IOException
252: {
253: if (this .name == null) {
254: this .writeEncoded(writer, this .contents);
255: return;
256: }
257: writer.print('<');
258: writer.print(this .name);
259: if (! this .attributes.isEmpty()) {
260: Enumeration enum = this .attributes.keys();
261: while (enum.hasMoreElements()) {
262: writer.print(' ');
263: String key = (String) enum.nextElement();
264: String value = (String) this .attributes.get(key);
265: writer.print(key);
266: writer.print('='); writer.write('"');
267: this .writeEncoded(writer, value);
268: writer.write('"');
269: }
270: }
271: if ((this .contents != null) && (this .contents.length() > 0)) {
272: writer.print('>');
273: this .writeEncoded(writer, this .contents);
274: writer.print('<'); writer.print('/');
275: writer.print(this .name);
276: writer.write('>');
277: } else if (this .children.isEmpty()) {
278: writer.print('/'); writer.print('>');
279: } else {
280: writer.print('>');
281: Enumeration enum = this .enumerateChildren();
282: while (enum.hasMoreElements()) {
283: XmlElement child = (XmlElement) enum.nextElement();
284: child.write(writer);
285: }
286: writer.print('<'); writer.print('/');
287: writer.print(this .name);
288: writer.print('>');
289: }
290: }
291:
292: protected void writeEncoded(PrintStream writer, String str)
293: throws IOException {
294: for (int i = 0; i < str.length(); i += 1) {
295: char ch = str.charAt(i);
296: switch (ch) {
297: case '<':
298: writer.write('&');
299: writer.write('l');
300: writer.write('t');
301: writer.write(';');
302: break;
303: case '>':
304: writer.write('&');
305: writer.write('g');
306: writer.write('t');
307: writer.write(';');
308: break;
309: case '&':
310: writer.write('&');
311: writer.write('a');
312: writer.write('m');
313: writer.write('p');
314: writer.write(';');
315: break;
316: case '"':
317: writer.write('&');
318: writer.write('q');
319: writer.write('u');
320: writer.write('o');
321: writer.write('t');
322: writer.write(';');
323: break;
324: case '\'':
325: writer.write('&');
326: writer.write('a');
327: writer.write('p');
328: writer.write('o');
329: writer.write('s');
330: writer.write(';');
331: break;
332: default:
333: int unicode = (int) ch;
334: if ((unicode < 32) || (unicode > 126)) {
335: writer.write('&');
336: writer.write('#');
337: writer.write('x');
338: writer.print(Integer.toString(unicode, 16));
339: writer.write(';');
340: } else {
341: writer.write(ch);
342: }
343: }
344: }
345: }
346:
347: protected void scanIdentifier(StringBuffer result)
348: throws IOException {
349: for (;;) {
350: char ch = this .readChar();
351: if (((ch < 'A') || (ch > 'Z'))
352: && ((ch < 'a') || (ch > 'z'))
353: && ((ch < '0') || (ch > '9')) && (ch != '_')
354: && (ch != '.') && (ch != ':') && (ch != '-')
355: && (ch <= '\u007E')) {
356: this .unreadChar(ch);
357: return;
358: }
359: result.append(ch);
360: }
361: }
362:
363: protected char scanWhitespace() throws IOException {
364: for (;;) {
365: char ch = this .readChar();
366: switch (ch) {
367: case ' ':
368: case '\t':
369: case '\n':
370: case '\r':
371: break;
372: default:
373: return ch;
374: }
375: }
376: }
377:
378: protected char scanWhitespace(StringBuffer result)
379: throws IOException {
380: for (;;) {
381: char ch = this .readChar();
382: switch (ch) {
383: case ' ':
384: case '\t':
385: case '\n':
386: result.append(ch);
387: case '\r':
388: break;
389: default:
390: return ch;
391: }
392: }
393: }
394:
395: protected void scanString(StringBuffer string) throws IOException {
396: char delimiter = this .readChar();
397: if ((delimiter != '\'') && (delimiter != '"')) {
398: throw this .expectedInput("' or \"");
399: }
400: for (;;) {
401: char ch = this .readChar();
402: if (ch == delimiter) {
403: return;
404: } else if (ch == '&') {
405: this .resolveEntity(string);
406: } else {
407: string.append(ch);
408: }
409: }
410: }
411:
412: protected void scanPCData(StringBuffer data) throws IOException {
413: for (;;) {
414: char ch = this .readChar();
415: if (ch == '<') {
416: ch = this .readChar();
417: if (ch == '!') {
418: this .checkCDATA(data);
419: } else {
420: this .unreadChar(ch);
421: return;
422: }
423: } else if (ch == '&') {
424: this .resolveEntity(data);
425: } else {
426: data.append(ch);
427: }
428: }
429: }
430:
431: protected boolean checkCDATA(StringBuffer buf) throws IOException {
432: char ch = this .readChar();
433: if (ch != '[') {
434: this .unreadChar(ch);
435: this .skipSpecialTag(0);
436: return false;
437: } else if (!this .checkLiteral("CDATA[")) {
438: this .skipSpecialTag(1); // one [ has already been read
439: return false;
440: } else {
441: int delimiterCharsSkipped = 0;
442: while (delimiterCharsSkipped < 3) {
443: ch = this .readChar();
444: switch (ch) {
445: case ']':
446: if (delimiterCharsSkipped < 2) {
447: delimiterCharsSkipped += 1;
448: } else {
449: buf.append(']');
450: buf.append(']');
451: delimiterCharsSkipped = 0;
452: }
453: break;
454: case '>':
455: if (delimiterCharsSkipped < 2) {
456: for (int i = 0; i < delimiterCharsSkipped; i++) {
457: buf.append(']');
458: }
459: delimiterCharsSkipped = 0;
460: buf.append('>');
461: } else {
462: delimiterCharsSkipped = 3;
463: }
464: break;
465: default:
466: for (int i = 0; i < delimiterCharsSkipped; i += 1) {
467: buf.append(']');
468: }
469: buf.append(ch);
470: delimiterCharsSkipped = 0;
471: }
472: }
473: return true;
474: }
475: }
476:
477: protected void skipComment() throws IOException {
478: int dashesToRead = 2;
479: while (dashesToRead > 0) {
480: char ch = this .readChar();
481: if (ch == '-') {
482: dashesToRead -= 1;
483: } else {
484: dashesToRead = 2;
485: }
486: }
487: if (this .readChar() != '>') {
488: throw this .expectedInput(">");
489: }
490: }
491:
492: protected void skipSpecialTag(int bracketLevel) throws IOException {
493: int tagLevel = 1; // <
494: char stringDelimiter = '\0';
495: if (bracketLevel == 0) {
496: char ch = this .readChar();
497: if (ch == '[') {
498: bracketLevel += 1;
499: } else if (ch == '-') {
500: ch = this .readChar();
501: if (ch == '[') {
502: bracketLevel += 1;
503: } else if (ch == ']') {
504: bracketLevel -= 1;
505: } else if (ch == '-') {
506: this .skipComment();
507: return;
508: }
509: }
510: }
511: while (tagLevel > 0) {
512: char ch = this .readChar();
513: if (stringDelimiter == '\0') {
514: if ((ch == '"') || (ch == '\'')) {
515: stringDelimiter = ch;
516: } else if (bracketLevel <= 0) {
517: if (ch == '<') {
518: tagLevel += 1;
519: } else if (ch == '>') {
520: tagLevel -= 1;
521: }
522: }
523: if (ch == '[') {
524: bracketLevel += 1;
525: } else if (ch == ']') {
526: bracketLevel -= 1;
527: }
528: } else {
529: if (ch == stringDelimiter) {
530: stringDelimiter = '\0';
531: }
532: }
533: }
534: }
535:
536: protected boolean checkLiteral(String literal) throws IOException {
537: int length = literal.length();
538: for (int i = 0; i < length; i += 1) {
539: if (this .readChar() != literal.charAt(i)) {
540: return false;
541: }
542: }
543: return true;
544: }
545:
546: protected char readChar() throws IOException {
547: if (this .charReadTooMuch != '\0') {
548: char ch = this .charReadTooMuch;
549: this .charReadTooMuch = '\0';
550: return ch;
551: } else {
552: int i = this .reader.read();
553: if (i < 0) {
554: throw this .unexpectedEndOfData();
555: } else if (i == 10) {
556: this .parserLineNr += 1;
557: return '\n';
558: } else {
559: return (char) i;
560: }
561: }
562: }
563:
564: protected void scanElement(XmlElement elt) throws IOException {
565: StringBuffer buf = new StringBuffer();
566: this .scanIdentifier(buf);
567: String name = buf.toString();
568: elt.setName(name);
569: char ch = this .scanWhitespace();
570: while ((ch != '>') && (ch != '/')) {
571: buf.setLength(0);
572: this .unreadChar(ch);
573: this .scanIdentifier(buf);
574: String key = buf.toString();
575: ch = this .scanWhitespace();
576: if (ch != '=') {
577: throw this .expectedInput("=");
578: }
579: this .unreadChar(this .scanWhitespace());
580: buf.setLength(0);
581: this .scanString(buf);
582: elt.setAttribute(key, buf);
583: ch = this .scanWhitespace();
584: }
585: if (ch == '/') {
586: ch = this .readChar();
587: if (ch != '>') {
588: throw this .expectedInput(">");
589: }
590: return;
591: }
592: buf.setLength(0);
593: ch = this .scanWhitespace(buf);
594: if (ch != '<') {
595: this .unreadChar(ch);
596: this .scanPCData(buf);
597: } else {
598: for (;;) {
599: ch = this .readChar();
600: if (ch == '!') {
601: if (this .checkCDATA(buf)) {
602: this .scanPCData(buf);
603: break;
604: } else {
605: ch = this .scanWhitespace(buf);
606: if (ch != '<') {
607: this .unreadChar(ch);
608: this .scanPCData(buf);
609: break;
610: }
611: }
612: } else {
613: buf.setLength(0);
614: break;
615: }
616: }
617: }
618: if (buf.length() == 0) {
619: while (ch != '/') {
620: if (ch == '!') {
621: ch = this .readChar();
622: if (ch != '-') {
623: throw this .expectedInput("Comment or Element");
624: }
625: ch = this .readChar();
626: if (ch != '-') {
627: throw this .expectedInput("Comment or Element");
628: }
629: this .skipComment();
630: } else {
631: this .unreadChar(ch);
632: XmlElement child = this .createAnotherElement();
633: this .scanElement(child);
634: elt.addChild(child);
635: }
636: ch = this .scanWhitespace();
637: if (ch != '<') {
638: throw this .expectedInput("<");
639: }
640: ch = this .readChar();
641: }
642: this .unreadChar(ch);
643: } else {
644: if (this .ignoreWhitespace) {
645: elt.setContent(buf.toString().trim());
646: } else {
647: elt.setContent(buf.toString());
648: }
649: }
650: ch = this .readChar();
651: if (ch != '/') {
652: throw this .expectedInput("/");
653: }
654: this .unreadChar(this .scanWhitespace());
655: if (!this .checkLiteral(name)) {
656: throw this .expectedInput(name);
657: }
658: if (this .scanWhitespace() != '>') {
659: throw this .expectedInput(">");
660: }
661: }
662:
663: protected void resolveEntity(StringBuffer buf) throws IOException {
664: char ch = '\0';
665: StringBuffer keyBuf = new StringBuffer();
666: for (;;) {
667: ch = this .readChar();
668: if (ch == ';') {
669: break;
670: }
671: keyBuf.append(ch);
672: }
673: String key = keyBuf.toString();
674: if (key.charAt(0) == '#') {
675: try {
676: if (key.charAt(1) == 'x') {
677: ch = (char) Integer.parseInt(key.substring(2), 16);
678: } else {
679: ch = (char) Integer.parseInt(key.substring(1), 10);
680: }
681: } catch (NumberFormatException e) {
682: throw this .unknownEntity(key);
683: }
684: buf.append(ch);
685: } else {
686: char[] value = (char[]) this .entities.get(key);
687: if (value == null) {
688: throw this .unknownEntity(key);
689: }
690: buf.append(value);
691: }
692: }
693:
694: protected void unreadChar(char ch) {
695: this .charReadTooMuch = ch;
696: }
697:
698: protected XmlParseException invalidValueSet(String name) {
699: String msg = "Invalid value set (entity name = \"" + name
700: + "\")";
701: return new XmlParseException(this .getName(), this .parserLineNr,
702: msg);
703: }
704:
705: protected XmlParseException invalidValue(String name, String value) {
706: String msg = "Attribute \"" + name
707: + "\" does not contain a valid " + "value (\"" + value
708: + "\")";
709: return new XmlParseException(this .getName(), this .parserLineNr,
710: msg);
711: }
712:
713: protected XmlParseException unexpectedEndOfData() {
714: String msg = "Unexpected end of data reached";
715: return new XmlParseException(this .getName(), this .parserLineNr,
716: msg);
717: }
718:
719: protected XmlParseException syntaxError(String context) {
720: String msg = "Syntax error while parsing " + context;
721: return new XmlParseException(this .getName(), this .parserLineNr,
722: msg);
723: }
724:
725: protected XmlParseException expectedInput(String charSet) {
726: String msg = "Expected: " + charSet;
727: return new XmlParseException(this .getName(), this .parserLineNr,
728: msg);
729: }
730:
731: protected XmlParseException unknownEntity(String name) {
732: String msg = "Unknown or invalid entity: &" + name + ";";
733: return new XmlParseException(this.getName(), this.parserLineNr,
734: msg);
735: }
736: }
|