0001: /* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany
0002: *
0003: * Permission is hereby granted, free of charge, to any person obtaining a copy
0004: * of this software and associated documentation files (the "Software"), to deal
0005: * in the Software without restriction, including without limitation the rights
0006: * to use, copy, modify, merge, publish, distribute, sublicense, and/or
0007: * sell copies of the Software, and to permit persons to whom the Software is
0008: * furnished to do so, subject to the following conditions:
0009: *
0010: * The above copyright notice and this permission notice shall be included in
0011: * all copies or substantial portions of the Software.
0012: *
0013: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0016: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
0017: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
0018: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
0019: * IN THE SOFTWARE. */
0020:
0021: // Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode)
0022: package org.kxml2.io;
0023:
0024: import java.io.*;
0025: import java.util.*;
0026:
0027: import org.xmlpull.v1.*;
0028:
0029: /** A simple, pull based XML parser. This classe replaces the kXML 1
0030: XmlParser class and the corresponding event classes. */
0031:
0032: public class KXmlParser implements XmlPullParser {
0033:
0034: private Object location;
0035: static final private String UNEXPECTED_EOF = "Unexpected EOF";
0036: static final private String ILLEGAL_TYPE = "Wrong event type";
0037: static final private int LEGACY = 999;
0038: static final private int XML_DECL = 998;
0039:
0040: // general
0041:
0042: private String version;
0043: private Boolean standalone;
0044:
0045: private boolean processNsp;
0046: private boolean relaxed;
0047: private Hashtable entityMap;
0048: private int depth;
0049: private String[] elementStack = new String[16];
0050: private String[] nspStack = new String[8];
0051: private int[] nspCounts = new int[4];
0052:
0053: // source
0054:
0055: private Reader reader;
0056: private String encoding;
0057: private char[] srcBuf;
0058:
0059: private int srcPos;
0060: private int srcCount;
0061:
0062: private int line;
0063: private int column;
0064:
0065: // txtbuffer
0066:
0067: private char[] txtBuf = new char[128];
0068: private int txtPos;
0069:
0070: // Event-related
0071:
0072: private int type;
0073: //private String text;
0074: private boolean isWhitespace;
0075: private String namespace;
0076: private String prefix;
0077: private String name;
0078:
0079: private boolean degenerated;
0080: private int attributeCount;
0081: private String[] attributes = new String[16];
0082: private int stackMismatch = 0;
0083: private String error;
0084:
0085: /**
0086: * A separate peek buffer seems simpler than managing
0087: * wrap around in the first level read buffer */
0088:
0089: private int[] peek = new int[2];
0090: private int peekCount;
0091: private boolean wasCR;
0092:
0093: private boolean unresolved;
0094: private boolean token;
0095:
0096: public KXmlParser() {
0097: srcBuf = new char[Runtime.getRuntime().freeMemory() >= 1048576 ? 8192
0098: : 128];
0099: }
0100:
0101: private final boolean isProp(String n1, boolean prop, String n2) {
0102: if (!n1.startsWith("http://xmlpull.org/v1/doc/"))
0103: return false;
0104: if (prop)
0105: return n1.substring(42).equals(n2);
0106: else
0107: return n1.substring(40).equals(n2);
0108: }
0109:
0110: private final boolean adjustNsp() throws XmlPullParserException {
0111:
0112: boolean any = false;
0113:
0114: for (int i = 0; i < attributeCount << 2; i += 4) {
0115: // * 4 - 4; i >= 0; i -= 4) {
0116:
0117: String attrName = attributes[i + 2];
0118: int cut = attrName.indexOf(':');
0119: String prefix;
0120:
0121: if (cut != -1) {
0122: prefix = attrName.substring(0, cut);
0123: attrName = attrName.substring(cut + 1);
0124: } else if (attrName.equals("xmlns")) {
0125: prefix = attrName;
0126: attrName = null;
0127: } else
0128: continue;
0129:
0130: if (!prefix.equals("xmlns")) {
0131: any = true;
0132: } else {
0133: int j = (nspCounts[depth]++) << 1;
0134:
0135: nspStack = ensureCapacity(nspStack, j + 2);
0136: nspStack[j] = attrName;
0137: nspStack[j + 1] = attributes[i + 3];
0138:
0139: if (attrName != null && attributes[i + 3].equals(""))
0140: error("illegal empty namespace");
0141:
0142: // prefixMap = new PrefixMap (prefixMap, attrName, attr.getValue ());
0143:
0144: //System.out.println (prefixMap);
0145:
0146: System.arraycopy(attributes, i + 4, attributes, i,
0147: ((--attributeCount) << 2) - i);
0148:
0149: i -= 4;
0150: }
0151: }
0152:
0153: if (any) {
0154: for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) {
0155:
0156: String attrName = attributes[i + 2];
0157: int cut = attrName.indexOf(':');
0158:
0159: if (cut == 0 && !relaxed)
0160: throw new RuntimeException(
0161: "illegal attribute name: " + attrName
0162: + " at " + this );
0163:
0164: else if (cut != -1) {
0165: String attrPrefix = attrName.substring(0, cut);
0166:
0167: attrName = attrName.substring(cut + 1);
0168:
0169: String attrNs = getNamespace(attrPrefix);
0170:
0171: if (attrNs == null && !relaxed)
0172: throw new RuntimeException("Undefined Prefix: "
0173: + attrPrefix + " in " + this );
0174:
0175: attributes[i] = attrNs;
0176: attributes[i + 1] = attrPrefix;
0177: attributes[i + 2] = attrName;
0178:
0179: /*
0180: if (!relaxed) {
0181: for (int j = (attributeCount << 2) - 4; j > i; j -= 4)
0182: if (attrName.equals(attributes[j + 2])
0183: && attrNs.equals(attributes[j]))
0184: exception(
0185: "Duplicate Attribute: {"
0186: + attrNs
0187: + "}"
0188: + attrName);
0189: }
0190: */
0191: }
0192: }
0193: }
0194:
0195: int cut = name.indexOf(':');
0196:
0197: if (cut == 0)
0198: error("illegal tag name: " + name);
0199:
0200: if (cut != -1) {
0201: prefix = name.substring(0, cut);
0202: name = name.substring(cut + 1);
0203: }
0204:
0205: this .namespace = getNamespace(prefix);
0206:
0207: if (this .namespace == null) {
0208: if (prefix != null)
0209: error("undefined prefix: " + prefix);
0210: this .namespace = NO_NAMESPACE;
0211: }
0212:
0213: return any;
0214: }
0215:
0216: private final String[] ensureCapacity(String[] arr, int required) {
0217: if (arr.length >= required)
0218: return arr;
0219: String[] bigger = new String[required + 16];
0220: System.arraycopy(arr, 0, bigger, 0, arr.length);
0221: return bigger;
0222: }
0223:
0224: private final void error(String desc) throws XmlPullParserException {
0225: if (relaxed) {
0226: if (error == null)
0227: error = "ERR: " + desc;
0228: } else
0229: exception(desc);
0230: }
0231:
0232: private final void exception(String desc)
0233: throws XmlPullParserException {
0234: throw new XmlPullParserException(desc.length() < 100 ? desc
0235: : desc.substring(0, 100) + "\n", this , null);
0236: }
0237:
0238: /**
0239: * common base for next and nextToken. Clears the state, except from
0240: * txtPos and whitespace. Does not set the type variable */
0241:
0242: private final void nextImpl() throws IOException,
0243: XmlPullParserException {
0244:
0245: if (reader == null)
0246: exception("No Input specified");
0247:
0248: if (type == END_TAG)
0249: depth--;
0250:
0251: while (true) {
0252: attributeCount = -1;
0253:
0254: // degenerated needs to be handled before error because of possible
0255: // processor expectations(!)
0256:
0257: if (degenerated) {
0258: degenerated = false;
0259: type = END_TAG;
0260: return;
0261: }
0262:
0263: if (error != null) {
0264: for (int i = 0; i < error.length(); i++)
0265: push(error.charAt(i));
0266: // text = error;
0267: error = null;
0268: type = COMMENT;
0269: return;
0270: }
0271:
0272: if (relaxed
0273: && (stackMismatch > 0 || (peek(0) == -1 && depth > 0))) {
0274: int sp = (depth - 1) << 2;
0275: type = END_TAG;
0276: namespace = elementStack[sp];
0277: prefix = elementStack[sp + 1];
0278: name = elementStack[sp + 2];
0279: if (stackMismatch != 1)
0280: error = "missing end tag /" + name + " inserted";
0281: if (stackMismatch > 0)
0282: stackMismatch--;
0283: return;
0284: }
0285:
0286: prefix = null;
0287: name = null;
0288: namespace = null;
0289: // text = null;
0290:
0291: type = peekType();
0292:
0293: switch (type) {
0294:
0295: case ENTITY_REF:
0296: pushEntity();
0297: return;
0298:
0299: case START_TAG:
0300: parseStartTag(false);
0301: return;
0302:
0303: case END_TAG:
0304: parseEndTag();
0305: return;
0306:
0307: case END_DOCUMENT:
0308: return;
0309:
0310: case TEXT:
0311: pushText('<', !token);
0312: if (depth == 0) {
0313: if (isWhitespace)
0314: type = IGNORABLE_WHITESPACE;
0315: // make exception switchable for instances.chg... !!!!
0316: // else
0317: // exception ("text '"+getText ()+"' not allowed outside root element");
0318: }
0319: return;
0320:
0321: default:
0322: type = parseLegacy(token);
0323: if (type != XML_DECL)
0324: return;
0325: }
0326: }
0327: }
0328:
0329: private final int parseLegacy(boolean push) throws IOException,
0330: XmlPullParserException {
0331:
0332: String req = "";
0333: int term;
0334: int result;
0335: int prev = 0;
0336:
0337: read(); // <
0338: int c = read();
0339:
0340: if (c == '?') {
0341: if ((peek(0) == 'x' || peek(0) == 'X')
0342: && (peek(1) == 'm' || peek(1) == 'M')) {
0343:
0344: if (push) {
0345: push(peek(0));
0346: push(peek(1));
0347: }
0348: read();
0349: read();
0350:
0351: if ((peek(0) == 'l' || peek(0) == 'L')
0352: && peek(1) <= ' ') {
0353:
0354: if (line != 1 || column > 4)
0355: error("PI must not start with xml");
0356:
0357: parseStartTag(true);
0358:
0359: if (attributeCount < 1
0360: || !"version".equals(attributes[2]))
0361: error("version expected");
0362:
0363: version = attributes[3];
0364:
0365: int pos = 1;
0366:
0367: if (pos < attributeCount
0368: && "encoding".equals(attributes[2 + 4])) {
0369: encoding = attributes[3 + 4];
0370: pos++;
0371: }
0372:
0373: if (pos < attributeCount
0374: && "standalone"
0375: .equals(attributes[4 * pos + 2])) {
0376: String st = attributes[3 + 4 * pos];
0377: if ("yes".equals(st))
0378: standalone = new Boolean(true);
0379: else if ("no".equals(st))
0380: standalone = new Boolean(false);
0381: else
0382: error("illegal standalone value: " + st);
0383: pos++;
0384: }
0385:
0386: if (pos != attributeCount)
0387: error("illegal xmldecl");
0388:
0389: isWhitespace = true;
0390: txtPos = 0;
0391:
0392: return XML_DECL;
0393: }
0394: }
0395:
0396: /* int c0 = read ();
0397: int c1 = read ();
0398: int */
0399:
0400: term = '?';
0401: result = PROCESSING_INSTRUCTION;
0402: } else if (c == '!') {
0403: if (peek(0) == '-') {
0404: result = COMMENT;
0405: req = "--";
0406: term = '-';
0407: } else if (peek(0) == '[') {
0408: result = CDSECT;
0409: req = "[CDATA[";
0410: term = ']';
0411: push = true;
0412: } else {
0413: result = DOCDECL;
0414: req = "DOCTYPE";
0415: term = -1;
0416: }
0417: } else {
0418: error("illegal: <" + c);
0419: return COMMENT;
0420: }
0421:
0422: for (int i = 0; i < req.length(); i++)
0423: read(req.charAt(i));
0424:
0425: if (result == DOCDECL)
0426: parseDoctype(push);
0427: else {
0428: while (true) {
0429: c = read();
0430: if (c == -1) {
0431: error(UNEXPECTED_EOF);
0432: return COMMENT;
0433: }
0434:
0435: if (push)
0436: push(c);
0437:
0438: if ((term == '?' || c == term) && peek(0) == term
0439: && peek(1) == '>')
0440: break;
0441:
0442: prev = c;
0443: }
0444:
0445: if (term == '-' && prev == '-')
0446: error("illegal comment delimiter: --->");
0447:
0448: read();
0449: read();
0450:
0451: if (push && term != '?')
0452: txtPos--;
0453:
0454: }
0455: return result;
0456: }
0457:
0458: /** precondition: <! consumed */
0459:
0460: private final void parseDoctype(boolean push) throws IOException,
0461: XmlPullParserException {
0462:
0463: int nesting = 1;
0464: boolean quoted = false;
0465:
0466: // read();
0467:
0468: while (true) {
0469: int i = read();
0470: switch (i) {
0471:
0472: case -1:
0473: error(UNEXPECTED_EOF);
0474: return;
0475:
0476: case '\'':
0477: quoted = !quoted;
0478: break;
0479:
0480: case '<':
0481: if (!quoted)
0482: nesting++;
0483: break;
0484:
0485: case '>':
0486: if (!quoted) {
0487: if ((--nesting) == 0)
0488: return;
0489: }
0490: break;
0491: }
0492: if (push)
0493: push(i);
0494: }
0495: }
0496:
0497: /* precondition: </ consumed */
0498:
0499: private final void parseEndTag() throws IOException,
0500: XmlPullParserException {
0501:
0502: read(); // '<'
0503: read(); // '/'
0504: name = readName();
0505: skip();
0506: read('>');
0507:
0508: int sp = (depth - 1) << 2;
0509:
0510: if (depth == 0) {
0511: error("element stack empty");
0512: type = COMMENT;
0513: return;
0514: }
0515:
0516: if (!name.equals(elementStack[sp + 3])) {
0517: error("expected: /" + elementStack[sp + 3] + " read: "
0518: + name);
0519:
0520: // become case insensitive in relaxed mode
0521:
0522: int probe = sp;
0523: while (probe >= 0
0524: && !name.toLowerCase().equals(
0525: elementStack[probe + 3].toLowerCase())) {
0526: stackMismatch++;
0527: probe -= 4;
0528: }
0529:
0530: if (probe < 0) {
0531: stackMismatch = 0;
0532: // text = "unexpected end tag ignored";
0533: type = COMMENT;
0534: return;
0535: }
0536: }
0537:
0538: namespace = elementStack[sp];
0539: prefix = elementStack[sp + 1];
0540: name = elementStack[sp + 2];
0541: }
0542:
0543: private final int peekType() throws IOException {
0544: switch (peek(0)) {
0545: case -1:
0546: return END_DOCUMENT;
0547: case '&':
0548: return ENTITY_REF;
0549: case '<':
0550: switch (peek(1)) {
0551: case '/':
0552: return END_TAG;
0553: case '?':
0554: case '!':
0555: return LEGACY;
0556: default:
0557: return START_TAG;
0558: }
0559: default:
0560: return TEXT;
0561: }
0562: }
0563:
0564: private final String get(int pos) {
0565: return new String(txtBuf, pos, txtPos - pos);
0566: }
0567:
0568: /*
0569: private final String pop (int pos) {
0570: String result = new String (txtBuf, pos, txtPos - pos);
0571: txtPos = pos;
0572: return result;
0573: }
0574: */
0575:
0576: private final void push(int c) {
0577:
0578: isWhitespace &= c <= ' ';
0579:
0580: if (txtPos == txtBuf.length) {
0581: char[] bigger = new char[txtPos * 4 / 3 + 4];
0582: System.arraycopy(txtBuf, 0, bigger, 0, txtPos);
0583: txtBuf = bigger;
0584: }
0585:
0586: txtBuf[txtPos++] = (char) c;
0587: }
0588:
0589: /** Sets name and attributes */
0590:
0591: private final void parseStartTag(boolean xmldecl)
0592: throws IOException, XmlPullParserException {
0593:
0594: if (!xmldecl)
0595: read();
0596: name = readName();
0597: attributeCount = 0;
0598:
0599: while (true) {
0600: skip();
0601:
0602: int c = peek(0);
0603:
0604: if (xmldecl) {
0605: if (c == '?') {
0606: read();
0607: read('>');
0608: return;
0609: }
0610: } else {
0611: if (c == '/') {
0612: degenerated = true;
0613: read();
0614: skip();
0615: read('>');
0616: break;
0617: }
0618:
0619: if (c == '>' && !xmldecl) {
0620: read();
0621: break;
0622: }
0623: }
0624:
0625: if (c == -1) {
0626: error(UNEXPECTED_EOF);
0627: //type = COMMENT;
0628: return;
0629: }
0630:
0631: String attrName = readName();
0632:
0633: if (attrName.length() == 0) {
0634: error("attr name expected");
0635: //type = COMMENT;
0636: break;
0637: }
0638:
0639: int i = (attributeCount++) << 2;
0640:
0641: attributes = ensureCapacity(attributes, i + 4);
0642:
0643: attributes[i++] = "";
0644: attributes[i++] = null;
0645: attributes[i++] = attrName;
0646:
0647: skip();
0648:
0649: if (peek(0) != '=') {
0650: error("Attr.value missing f. " + attrName);
0651: attributes[i] = "1";
0652: } else {
0653: read('=');
0654: skip();
0655: int delimiter = peek(0);
0656:
0657: if (delimiter != '\'' && delimiter != '"') {
0658: error("attr value delimiter missing!");
0659: delimiter = ' ';
0660: } else
0661: read();
0662:
0663: int p = txtPos;
0664: pushText(delimiter, true);
0665:
0666: attributes[i] = get(p);
0667: txtPos = p;
0668:
0669: if (delimiter != ' ')
0670: read(); // skip endquote
0671: }
0672: }
0673:
0674: int sp = depth++ << 2;
0675:
0676: elementStack = ensureCapacity(elementStack, sp + 4);
0677: elementStack[sp + 3] = name;
0678:
0679: if (depth >= nspCounts.length) {
0680: int[] bigger = new int[depth + 4];
0681: System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length);
0682: nspCounts = bigger;
0683: }
0684:
0685: nspCounts[depth] = nspCounts[depth - 1];
0686:
0687: /*
0688: if(!relaxed){
0689: for (int i = attributeCount - 1; i > 0; i--) {
0690: for (int j = 0; j < i; j++) {
0691: if (getAttributeName(i).equals(getAttributeName(j)))
0692: exception("Duplicate Attribute: " + getAttributeName(i));
0693: }
0694: }
0695: }
0696: */
0697: if (processNsp)
0698: adjustNsp();
0699: else
0700: namespace = "";
0701:
0702: elementStack[sp] = namespace;
0703: elementStack[sp + 1] = prefix;
0704: elementStack[sp + 2] = name;
0705: }
0706:
0707: /**
0708: * result: isWhitespace; if the setName parameter is set,
0709: * the name of the entity is stored in "name" */
0710:
0711: private final void pushEntity() throws IOException,
0712: XmlPullParserException {
0713:
0714: push(read()); // &
0715:
0716: int pos = txtPos;
0717:
0718: while (true) {
0719: int c = read();
0720: if (c == ';')
0721: break;
0722: if (c < 128 && (c < '0' || c > '9') && (c < 'a' || c > 'z')
0723: && (c < 'A' || c > 'Z') && c != '_' && c != '-'
0724: && c != '#') {
0725: if (!relaxed) {
0726: error("unterminated entity ref");
0727: }
0728: //; ends with:"+(char)c);
0729: if (c != -1)
0730: push(c);
0731: return;
0732: }
0733:
0734: push(c);
0735: }
0736:
0737: String code = get(pos);
0738: txtPos = pos - 1;
0739: if (token && type == ENTITY_REF) {
0740: name = code;
0741: }
0742:
0743: if (code.charAt(0) == '#') {
0744: int c = (code.charAt(1) == 'x' ? Integer.parseInt(code
0745: .substring(2), 16) : Integer.parseInt(code
0746: .substring(1)));
0747: push(c);
0748: return;
0749: }
0750:
0751: String result = (String) entityMap.get(code);
0752:
0753: unresolved = result == null;
0754:
0755: if (unresolved) {
0756: if (!token)
0757: error("unresolved: &" + code + ";");
0758: } else {
0759: for (int i = 0; i < result.length(); i++)
0760: push(result.charAt(i));
0761: }
0762: }
0763:
0764: /** types:
0765: '<': parse to any token (for nextToken ())
0766: '"': parse to quote
0767: ' ': parse to whitespace or '>'
0768: */
0769:
0770: private final void pushText(int delimiter, boolean resolveEntities)
0771: throws IOException, XmlPullParserException {
0772:
0773: int next = peek(0);
0774: int cbrCount = 0;
0775:
0776: while (next != -1 && next != delimiter) { // covers eof, '<', '"'
0777:
0778: if (delimiter == ' ')
0779: if (next <= ' ' || next == '>')
0780: break;
0781:
0782: if (next == '&') {
0783: if (!resolveEntities)
0784: break;
0785:
0786: pushEntity();
0787: } else if (next == '\n' && type == START_TAG) {
0788: read();
0789: push(' ');
0790: } else
0791: push(read());
0792:
0793: if (next == '>' && cbrCount >= 2 && delimiter != ']')
0794: error("Illegal: ]]>");
0795:
0796: if (next == ']')
0797: cbrCount++;
0798: else
0799: cbrCount = 0;
0800:
0801: next = peek(0);
0802: }
0803: }
0804:
0805: private final void read(char c) throws IOException,
0806: XmlPullParserException {
0807: int a = read();
0808: if (a != c)
0809: error("expected: '" + c + "' actual: '" + ((char) a) + "'");
0810: }
0811:
0812: private final int read() throws IOException {
0813: int result;
0814:
0815: if (peekCount == 0)
0816: result = peek(0);
0817: else {
0818: result = peek[0];
0819: peek[0] = peek[1];
0820: }
0821: // else {
0822: // result = peek[0];
0823: // System.arraycopy (peek, 1, peek, 0, peekCount-1);
0824: // }
0825: peekCount--;
0826:
0827: column++;
0828:
0829: if (result == '\n') {
0830:
0831: line++;
0832: column = 1;
0833: }
0834:
0835: return result;
0836: }
0837:
0838: /** Does never read more than needed */
0839:
0840: private final int peek(int pos) throws IOException {
0841:
0842: while (pos >= peekCount) {
0843:
0844: int nw;
0845:
0846: if (srcBuf.length <= 1)
0847: nw = reader.read();
0848: else if (srcPos < srcCount)
0849: nw = srcBuf[srcPos++];
0850: else {
0851: srcCount = reader.read(srcBuf, 0, srcBuf.length);
0852: if (srcCount <= 0)
0853: nw = -1;
0854: else
0855: nw = srcBuf[0];
0856:
0857: srcPos = 1;
0858: }
0859:
0860: if (nw == '\r') {
0861: wasCR = true;
0862: peek[peekCount++] = '\n';
0863: } else {
0864: if (nw == '\n') {
0865: if (!wasCR)
0866: peek[peekCount++] = '\n';
0867: } else
0868: peek[peekCount++] = nw;
0869:
0870: wasCR = false;
0871: }
0872: }
0873:
0874: return peek[pos];
0875: }
0876:
0877: private final String readName() throws IOException,
0878: XmlPullParserException {
0879:
0880: int pos = txtPos;
0881: int c = peek(0);
0882: if ((c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && c != '_'
0883: && c != ':' && c < 0x0c0 && !relaxed)
0884: error("name expected");
0885:
0886: do {
0887: push(read());
0888: c = peek(0);
0889: } while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
0890: || (c >= '0' && c <= '9') || c == '_' || c == '-'
0891: || c == ':' || c == '.' || c >= 0x0b7);
0892:
0893: String result = get(pos);
0894: txtPos = pos;
0895: return result;
0896: }
0897:
0898: private final void skip() throws IOException {
0899:
0900: while (true) {
0901: int c = peek(0);
0902: if (c > ' ' || c == -1)
0903: break;
0904: read();
0905: }
0906: }
0907:
0908: // public part starts here...
0909:
0910: public void setInput(Reader reader) throws XmlPullParserException {
0911: this .reader = reader;
0912:
0913: line = 1;
0914: column = 0;
0915: type = START_DOCUMENT;
0916: name = null;
0917: namespace = null;
0918: degenerated = false;
0919: attributeCount = -1;
0920: encoding = null;
0921: version = null;
0922: standalone = null;
0923:
0924: if (reader == null)
0925: return;
0926:
0927: srcPos = 0;
0928: srcCount = 0;
0929: peekCount = 0;
0930: depth = 0;
0931:
0932: entityMap = new Hashtable();
0933: entityMap.put("amp", "&");
0934: entityMap.put("apos", "'");
0935: entityMap.put("gt", ">");
0936: entityMap.put("lt", "<");
0937: entityMap.put("quot", "\"");
0938: }
0939:
0940: public void setInput(InputStream is, String _enc)
0941: throws XmlPullParserException {
0942:
0943: srcPos = 0;
0944: srcCount = 0;
0945: String enc = _enc;
0946:
0947: if (is == null)
0948: throw new IllegalArgumentException();
0949:
0950: try {
0951:
0952: if (enc == null) {
0953: // read four bytes
0954:
0955: int chk = 0;
0956:
0957: while (srcCount < 4) {
0958: int i = is.read();
0959: if (i == -1)
0960: break;
0961: chk = (chk << 8) | i;
0962: srcBuf[srcCount++] = (char) i;
0963: }
0964:
0965: if (srcCount == 4) {
0966: switch (chk) {
0967: case 0x00000FEFF:
0968: enc = "UTF-32BE";
0969: srcCount = 0;
0970: break;
0971:
0972: case 0x0FFFE0000:
0973: enc = "UTF-32LE";
0974: srcCount = 0;
0975: break;
0976:
0977: case 0x03c:
0978: enc = "UTF-32BE";
0979: srcBuf[0] = '<';
0980: srcCount = 1;
0981: break;
0982:
0983: case 0x03c000000:
0984: enc = "UTF-32LE";
0985: srcBuf[0] = '<';
0986: srcCount = 1;
0987: break;
0988:
0989: case 0x0003c003f:
0990: enc = "UTF-16BE";
0991: srcBuf[0] = '<';
0992: srcBuf[1] = '?';
0993: srcCount = 2;
0994: break;
0995:
0996: case 0x03c003f00:
0997: enc = "UTF-16LE";
0998: srcBuf[0] = '<';
0999: srcBuf[1] = '?';
1000: srcCount = 2;
1001: break;
1002:
1003: case 0x03c3f786d:
1004: while (true) {
1005: int i = is.read();
1006: if (i == -1)
1007: break;
1008: srcBuf[srcCount++] = (char) i;
1009: if (i == '>') {
1010: String s = new String(srcBuf, 0,
1011: srcCount);
1012: int i0 = s.indexOf("encoding");
1013: if (i0 != -1) {
1014: while (s.charAt(i0) != '"'
1015: && s.charAt(i0) != '\'')
1016: i0++;
1017: char deli = s.charAt(i0++);
1018: int i1 = s.indexOf(deli, i0);
1019: enc = s.substring(i0, i1);
1020: }
1021: break;
1022: }
1023: }
1024:
1025: default:
1026: if ((chk & 0x0ffff0000) == 0x0FEFF0000) {
1027: enc = "UTF-16BE";
1028: srcBuf[0] = (char) ((srcBuf[2] << 8) | srcBuf[3]);
1029: srcCount = 1;
1030: } else if ((chk & 0x0ffff0000) == 0x0fffe0000) {
1031: enc = "UTF-16LE";
1032: srcBuf[0] = (char) ((srcBuf[3] << 8) | srcBuf[2]);
1033: srcCount = 1;
1034: } else if ((chk & 0x0ffffff00) == 0x0EFBBBF00) {
1035: enc = "UTF-8";
1036: srcBuf[0] = srcBuf[3];
1037: srcCount = 1;
1038: }
1039: }
1040: }
1041: }
1042:
1043: if (enc == null)
1044: enc = "UTF-8";
1045:
1046: int sc = srcCount;
1047: setInput(new InputStreamReader(is, enc));
1048: encoding = _enc;
1049: srcCount = sc;
1050: } catch (Exception e) {
1051: throw new XmlPullParserException(
1052: "Invalid stream or encoding: " + e.toString(),
1053: this , e);
1054: }
1055: }
1056:
1057: public boolean getFeature(String feature) {
1058: if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature))
1059: return processNsp;
1060: else if (isProp(feature, false, "relaxed"))
1061: return relaxed;
1062: else
1063: return false;
1064: }
1065:
1066: public String getInputEncoding() {
1067: return encoding;
1068: }
1069:
1070: public void defineEntityReplacementText(String entity, String value)
1071: throws XmlPullParserException {
1072: if (entityMap == null)
1073: throw new RuntimeException(
1074: "entity replacement text must be defined after setInput!");
1075: entityMap.put(entity, value);
1076: }
1077:
1078: public Object getProperty(String property) {
1079: if (isProp(property, true, "xmldecl-version"))
1080: return version;
1081: if (isProp(property, true, "xmldecl-standalone"))
1082: return standalone;
1083: if (isProp(property, true, "location"))
1084: return location != null ? location : reader.toString();
1085: return null;
1086: }
1087:
1088: public int getNamespaceCount(int depth) {
1089: if (depth > this .depth)
1090: throw new IndexOutOfBoundsException();
1091: return nspCounts[depth];
1092: }
1093:
1094: public String getNamespacePrefix(int pos) {
1095: return nspStack[pos << 1];
1096: }
1097:
1098: public String getNamespaceUri(int pos) {
1099: return nspStack[(pos << 1) + 1];
1100: }
1101:
1102: public String getNamespace(String prefix) {
1103:
1104: if ("xml".equals(prefix))
1105: return "http://www.w3.org/XML/1998/namespace";
1106: if ("xmlns".equals(prefix))
1107: return "http://www.w3.org/2000/xmlns/";
1108:
1109: for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) {
1110: if (prefix == null) {
1111: if (nspStack[i] == null)
1112: return nspStack[i + 1];
1113: } else if (prefix.equals(nspStack[i]))
1114: return nspStack[i + 1];
1115: }
1116: return null;
1117: }
1118:
1119: public int getDepth() {
1120: return depth;
1121: }
1122:
1123: public String getPositionDescription() {
1124:
1125: StringBuffer buf = new StringBuffer(
1126: type < TYPES.length ? TYPES[type] : "unknown");
1127: buf.append(' ');
1128:
1129: if (type == START_TAG || type == END_TAG) {
1130: if (degenerated)
1131: buf.append("(empty) ");
1132: buf.append('<');
1133: if (type == END_TAG)
1134: buf.append('/');
1135:
1136: if (prefix != null)
1137: buf.append("{" + namespace + "}" + prefix + ":");
1138: buf.append(name);
1139:
1140: int cnt = attributeCount << 2;
1141: for (int i = 0; i < cnt; i += 4) {
1142: buf.append(' ');
1143: if (attributes[i + 1] != null)
1144: buf.append("{" + attributes[i] + "}"
1145: + attributes[i + 1] + ":");
1146: buf.append(attributes[i + 2] + "='" + attributes[i + 3]
1147: + "'");
1148: }
1149:
1150: buf.append('>');
1151: } else if (type == IGNORABLE_WHITESPACE)
1152: ;
1153: else if (type != TEXT)
1154: buf.append(getText());
1155: else if (isWhitespace)
1156: buf.append("(whitespace)");
1157: else {
1158: String text = getText();
1159: if (text.length() > 16)
1160: text = text.substring(0, 16) + "...";
1161: buf.append(text);
1162: }
1163:
1164: buf.append("@" + line + ":" + column);
1165: if (location != null) {
1166: buf.append(" in ");
1167: buf.append(location);
1168: } else if (reader != null) {
1169: buf.append(" in ");
1170: buf.append(reader.toString());
1171: }
1172: return buf.toString();
1173: }
1174:
1175: public int getLineNumber() {
1176: return line;
1177: }
1178:
1179: public int getColumnNumber() {
1180: return column;
1181: }
1182:
1183: public boolean isWhitespace() throws XmlPullParserException {
1184: if (type != TEXT && type != IGNORABLE_WHITESPACE
1185: && type != CDSECT)
1186: exception(ILLEGAL_TYPE);
1187: return isWhitespace;
1188: }
1189:
1190: public String getText() {
1191: return type < TEXT || (type == ENTITY_REF && unresolved) ? null
1192: : get(0);
1193: }
1194:
1195: public char[] getTextCharacters(int[] poslen) {
1196: if (type >= TEXT) {
1197: if (type == ENTITY_REF) {
1198: poslen[0] = 0;
1199: poslen[1] = name.length();
1200: return name.toCharArray();
1201: }
1202: poslen[0] = 0;
1203: poslen[1] = txtPos;
1204: return txtBuf;
1205: }
1206:
1207: poslen[0] = -1;
1208: poslen[1] = -1;
1209: return null;
1210: }
1211:
1212: public String getNamespace() {
1213: return namespace;
1214: }
1215:
1216: public String getName() {
1217: return name;
1218: }
1219:
1220: public String getPrefix() {
1221: return prefix;
1222: }
1223:
1224: public boolean isEmptyElementTag() throws XmlPullParserException {
1225: if (type != START_TAG)
1226: exception(ILLEGAL_TYPE);
1227: return degenerated;
1228: }
1229:
1230: public int getAttributeCount() {
1231: return attributeCount;
1232: }
1233:
1234: public String getAttributeType(int index) {
1235: return "CDATA";
1236: }
1237:
1238: public boolean isAttributeDefault(int index) {
1239: return false;
1240: }
1241:
1242: public String getAttributeNamespace(int index) {
1243: if (index >= attributeCount)
1244: throw new IndexOutOfBoundsException();
1245: return attributes[index << 2];
1246: }
1247:
1248: public String getAttributeName(int index) {
1249: if (index >= attributeCount)
1250: throw new IndexOutOfBoundsException();
1251: return attributes[(index << 2) + 2];
1252: }
1253:
1254: public String getAttributePrefix(int index) {
1255: if (index >= attributeCount)
1256: throw new IndexOutOfBoundsException();
1257: return attributes[(index << 2) + 1];
1258: }
1259:
1260: public String getAttributeValue(int index) {
1261: if (index >= attributeCount)
1262: throw new IndexOutOfBoundsException();
1263: return attributes[(index << 2) + 3];
1264: }
1265:
1266: public String getAttributeValue(String namespace, String name) {
1267:
1268: for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) {
1269: if (attributes[i + 2].equals(name)
1270: && (namespace == null || attributes[i]
1271: .equals(namespace)))
1272: return attributes[i + 3];
1273: }
1274:
1275: return null;
1276: }
1277:
1278: public int getEventType() throws XmlPullParserException {
1279: return type;
1280: }
1281:
1282: public int next() throws XmlPullParserException, IOException {
1283:
1284: txtPos = 0;
1285: isWhitespace = true;
1286: int minType = 9999;
1287: token = false;
1288:
1289: do {
1290: nextImpl();
1291: if (type < minType)
1292: minType = type;
1293: // if (curr <= TEXT) type = curr;
1294: } while (minType > ENTITY_REF // ignorable
1295: || (minType >= TEXT && peekType() >= TEXT));
1296:
1297: type = minType;
1298: if (type > TEXT)
1299: type = TEXT;
1300:
1301: return type;
1302: }
1303:
1304: public int nextToken() throws XmlPullParserException, IOException {
1305:
1306: isWhitespace = true;
1307: txtPos = 0;
1308:
1309: token = true;
1310: nextImpl();
1311: return type;
1312: }
1313:
1314: //
1315: // utility methods to make XML parsing easier ...
1316:
1317: public int nextTag() throws XmlPullParserException, IOException {
1318:
1319: next();
1320: if (type == TEXT && isWhitespace)
1321: next();
1322:
1323: if (type != END_TAG && type != START_TAG)
1324: exception("unexpected type");
1325:
1326: return type;
1327: }
1328:
1329: public void require(int type, String namespace, String name)
1330: throws XmlPullParserException, IOException {
1331:
1332: if (type != this .type
1333: || (namespace != null && !namespace
1334: .equals(getNamespace()))
1335: || (name != null && !name.equals(getName())))
1336: exception("expected: " + TYPES[type] + " {" + namespace
1337: + "}" + name);
1338: }
1339:
1340: public String nextText() throws XmlPullParserException, IOException {
1341: if (type != START_TAG)
1342: exception("precondition: START_TAG");
1343:
1344: next();
1345:
1346: String result;
1347:
1348: if (type == TEXT) {
1349: result = getText();
1350: next();
1351: } else
1352: result = "";
1353:
1354: if (type != END_TAG)
1355: exception("END_TAG expected");
1356:
1357: return result;
1358: }
1359:
1360: public void setFeature(String feature, boolean value)
1361: throws XmlPullParserException {
1362: if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature))
1363: processNsp = value;
1364: else if (isProp(feature, false, "relaxed"))
1365: relaxed = value;
1366: else
1367: exception("unsupported feature: " + feature);
1368: }
1369:
1370: public void setProperty(String property, Object value)
1371: throws XmlPullParserException {
1372: if (isProp(property, true, "location"))
1373: location = value;
1374: else
1375: throw new XmlPullParserException("unsupported property: "
1376: + property);
1377: }
1378:
1379: /**
1380: * Skip sub tree that is currently porser positioned on.
1381: * <br>NOTE: parser must be on START_TAG and when funtion returns
1382: * parser will be positioned on corresponding END_TAG.
1383: */
1384:
1385: // Implementation copied from Alek's mail...
1386: public void skipSubTree() throws XmlPullParserException,
1387: IOException {
1388: require(START_TAG, null, null);
1389: int level = 1;
1390: while (level > 0) {
1391: int eventType = next();
1392: if (eventType == END_TAG) {
1393: --level;
1394: } else if (eventType == START_TAG) {
1395: ++level;
1396: }
1397: }
1398: }
1399: }
|