001: /*
002: * Copyright 2004-2008 H2 Group. Licensed under the H2 License, Version 1.0
003: * (license2)
004: * Initial Developer: H2 Group
005: */
006: package org.h2.tools.doc;
007:
008: /**
009: * This class implements a simple XML pull parser.
010: * Only a subset of the XML pull parser API is implemented.
011: */
012: public class XMLParser {
013:
014: public static final int ERROR = 0;
015: public static final int START_ELEMENT = 1;
016: public static final int END_ELEMENT = 2;
017: public static final int PROCESSING_INSTRUCTION = 3;
018: public static final int CHARACTERS = 4;
019: public static final int COMMENT = 5;
020: public static final int SPACE = 6;
021: public static final int START_DOCUMENT = 7;
022: public static final int END_DOCUMENT = 8;
023: public static final int ENTITY_REFERENCE = 9;
024: public static final int ATTRIBUTE = 10;
025: public static final int DTD = 11;
026: public static final int CDATA = 12;
027: public static final int NAMESPACE = 13;
028: public static final int NOTATION_DECLARATION = 14;
029: public static final int ENTITY_DECLARATION = 15;
030:
031: private String xml;
032: private int index;
033: private int eventType;
034: private String currentText;
035: private String currentToken;
036: private String prefix, localName;
037: private String[] attributeValues = new String[3];
038: private int currentAttribute;
039: private boolean endElement;
040:
041: public XMLParser(String xml) {
042: this .xml = xml;
043: eventType = START_DOCUMENT;
044: }
045:
046: void addAttributeName(String prefix, String localName) {
047: if (attributeValues.length <= currentAttribute) {
048: String[] temp = new String[attributeValues.length * 2];
049: System.arraycopy(attributeValues, 0, temp, 0,
050: attributeValues.length);
051: attributeValues = temp;
052: }
053: attributeValues[currentAttribute++] = prefix;
054: attributeValues[currentAttribute++] = localName;
055: }
056:
057: void addAttributeValue(String v) {
058: attributeValues[currentAttribute++] = v;
059: }
060:
061: private int readChar() {
062: if (index >= xml.length()) {
063: return -1;
064: }
065: return xml.charAt(index++);
066: }
067:
068: private void back() {
069: index--;
070: }
071:
072: private void error(String expected) {
073: throw new Error("expected: " + expected + " got: "
074: + xml.substring(index));
075: }
076:
077: private void read(String chars) {
078: for (int i = 0; i < chars.length(); i++) {
079: if (readChar() != chars.charAt(i)) {
080: error(chars);
081: }
082: }
083: }
084:
085: private void skipSpaces() {
086: while (index < xml.length() && xml.charAt(index) <= ' ') {
087: index++;
088: }
089: }
090:
091: private void read() {
092: currentText = null;
093: currentAttribute = 0;
094: int tokenStart = index, currentStart = index;
095: int ch = readChar();
096: if (ch < 0) {
097: eventType = END_DOCUMENT;
098: } else if (ch == '<') {
099: currentStart = index;
100: ch = readChar();
101: if (ch < 0) {
102: eventType = ERROR;
103: } else if (ch == '?') {
104: eventType = PROCESSING_INSTRUCTION;
105: currentStart = index;
106: while (true) {
107: ch = readChar();
108: if (ch < 0) {
109: error("?>");
110: }
111: if (ch == '?' && readChar() == '>') {
112: break;
113: }
114: }
115: if (xml.substring(currentStart).startsWith("xml")) {
116: int back = tokenStart;
117: read();
118: tokenStart = back;
119: } else {
120: currentText = xml
121: .substring(currentStart, index - 1);
122: }
123: } else if (ch == '!') {
124: ch = readChar();
125: if (ch == '-') {
126: eventType = COMMENT;
127: if (readChar() != '-') {
128: error("-");
129: }
130: currentStart = index;
131: while (true) {
132: ch = readChar();
133: if (ch < 0) {
134: error("-->");
135: }
136: if (ch == '-' && readChar() == '-') {
137: read(">");
138: break;
139: }
140: }
141: currentText = xml
142: .substring(currentStart, index - 1);
143: } else if (ch == 'D') {
144: read("OCTYPE");
145: eventType = DTD;
146: while (true) {
147: ch = readChar();
148: if (ch < 0) {
149: break;
150: }
151: if (ch == '>') {
152: break;
153: }
154: }
155: } else if (ch == '[') {
156: read("CDATA[");
157: currentStart = index;
158: eventType = CHARACTERS;
159: while (true) {
160: ch = readChar();
161: if (ch < 0) {
162: error("]]>");
163: } else if (ch != ']') {
164: continue;
165: }
166: ch = readChar();
167: if (ch < 0) {
168: error("]]>");
169: } else if (ch == ']') {
170: do {
171: ch = readChar();
172: if (ch < 0) {
173: error("]]>");
174: }
175: } while (ch == ']');
176: if (ch == '>') {
177: currentText = xml.substring(
178: currentStart, index - 3);
179: break;
180: }
181: }
182: }
183: }
184: } else if (ch == '/') {
185: currentStart = index;
186: prefix = null;
187: eventType = END_ELEMENT;
188: while (true) {
189: ch = readChar();
190: if (ch < 0) {
191: error(">");
192: } else if (ch == ':') {
193: prefix = xml.substring(currentStart, index - 1);
194: currentStart = index + 1;
195: } else if (ch == '>') {
196: localName = xml.substring(currentStart,
197: index - 1);
198: break;
199: } else if (ch <= ' ') {
200: localName = xml.substring(currentStart,
201: index - 1);
202: skipSpaces();
203: read(">");
204: break;
205: }
206: }
207: } else {
208: prefix = null;
209: localName = null;
210: eventType = START_ELEMENT;
211: while (true) {
212: ch = readChar();
213: if (ch < 0) {
214: error(">");
215: } else if (ch == ':') {
216: prefix = xml.substring(currentStart, index - 1);
217: currentStart = index + 1;
218: } else if (ch <= ' ') {
219: localName = xml.substring(currentStart,
220: index - 1);
221: readAttributeValues();
222: ch = readChar();
223: }
224: if (ch == '/') {
225: if (localName == null) {
226: localName = xml.substring(currentStart,
227: index - 1);
228: }
229: read(">");
230: endElement = true;
231: break;
232: } else if (ch == '>') {
233: if (localName == null) {
234: localName = xml.substring(currentStart,
235: index - 1);
236: }
237: break;
238: }
239: }
240: }
241: } else {
242: // TODO need to replace &#xx;?
243: eventType = CHARACTERS;
244: while (true) {
245: ch = readChar();
246: if (ch < 0) {
247: break;
248: } else if (ch == '<') {
249: back();
250: break;
251: }
252: }
253: currentText = xml.substring(currentStart, index);
254: }
255: currentToken = xml.substring(tokenStart, index);
256: }
257:
258: private void readAttributeValues() {
259: while (true) {
260: int start = index;
261: int ch = readChar();
262: if (ch < 0) {
263: error(">");
264: } else if (ch <= ' ') {
265: continue;
266: } else if (ch == '/' || ch == '>') {
267: back();
268: return;
269: }
270: int end;
271: int localNameStart = start;
272: while (true) {
273: end = index;
274: ch = readChar();
275: if (ch < 0) {
276: error("=");
277: } else if (ch <= ' ') {
278: skipSpaces();
279: ch = readChar();
280: if (ch != '=') {
281: error("=");
282: }
283: break;
284: } else if (ch == '=') {
285: break;
286: } else if (ch == ':') {
287: localNameStart = index;
288: }
289: }
290: if (localNameStart == start) {
291: addAttributeName("", xml.substring(localNameStart, end));
292: } else {
293: addAttributeName(xml.substring(start,
294: localNameStart - 1), xml.substring(
295: localNameStart, end));
296: }
297: skipSpaces();
298: ch = readChar();
299: if (ch != '\"') {
300: error("\"");
301: }
302: start = index;
303: while (true) {
304: end = index;
305: ch = readChar();
306: if (ch < 0) {
307: error("\"");
308: } else if (ch == '\"') {
309: break;
310: }
311: }
312: addAttributeValue(xml.substring(start, end));
313: }
314: }
315:
316: public boolean hasNext() {
317: return index < xml.length();
318: }
319:
320: public int next() {
321: if (endElement) {
322: endElement = false;
323: eventType = END_ELEMENT;
324: currentToken = "";
325: } else {
326: read();
327: }
328: return eventType;
329: }
330:
331: public int nextTag() {
332: while (true) {
333: int type = next();
334: if (type != COMMENT && type != DTD
335: && type != PROCESSING_INSTRUCTION) {
336: return type;
337: }
338: }
339: }
340:
341: public int getEventType() {
342: return eventType;
343: }
344:
345: public String getText() {
346: return currentText;
347: }
348:
349: public String getToken() {
350: return currentToken;
351: }
352:
353: public int getAttributeCount() {
354: return currentAttribute / 3;
355: }
356:
357: public String getAttributePrefix(int index) {
358: return attributeValues[index * 3];
359: }
360:
361: public String getAttributeLocalName(int index) {
362: return attributeValues[index * 3 + 1];
363: }
364:
365: public String getAttributeName(int index) {
366: String prefix = getAttributePrefix(index);
367: String localName = getAttributeLocalName(index);
368: return prefix == null || prefix.length() == 0 ? localName
369: : prefix + ":" + localName;
370: }
371:
372: public String getAttributeValue(int index) {
373: return attributeValues[index * 3 + 2];
374: }
375:
376: public String getAttributeValue(String namespaceURI,
377: String localName) {
378: int len = getAttributeCount();
379: for (int i = 0; i < len; i++) {
380: if (getAttributeLocalName(i).equals(localName)) {
381: return getAttributeValue(i);
382: }
383: }
384: return null;
385: }
386:
387: public String getName() {
388: return prefix == null || prefix.length() == 0 ? localName
389: : prefix + ":" + localName;
390: }
391:
392: public String getLocalName() {
393: return localName;
394: }
395:
396: public String getPrefix() {
397: return prefix;
398: }
399:
400: public boolean isWhiteSpace() {
401: return getText().trim().length() == 0;
402: }
403:
404: public String getRemaining() {
405: return xml.substring(index);
406: }
407:
408: public int getPos() {
409: return index;
410: }
411:
412: }
|