001: package com.jclark.xml.tok;
002:
003: /**
004: * An XML TextDecl.
005: * @version $Revision: 1.4 $ $Date: 1998/12/30 02:42:01 $
006: */
007: public class TextDecl {
008: private String version;
009: private String encoding;
010:
011: /**
012: * Creates a <code>TextDecl</code> from the specified byte subarray.
013: * The specified encoding is used to convert bytes to characters.
014: * The byte subarray should be a <code>TOK_XML_DECL</code> token
015: * returned from Encoding.tokenizeProlog or Encoding.tokenizeContent,
016: * starting with <code><?</code> and ending with <code>?></code>.
017: * @exception InvalidTokenException if the specified byte subarray
018: * is not a legal XML TextDecl.
019: */
020: public TextDecl(Encoding enc, byte[] buf, int off, int end)
021: throws InvalidTokenException {
022: init(false, enc, buf, off, end);
023: }
024:
025: /**
026: * Return the encoding specified in the declaration, or null
027: * if no encoding was specified.
028: */
029: public String getEncoding() {
030: return encoding;
031: }
032:
033: /**
034: * Return the version specified in the declaration, or null
035: * if no version was specified.
036: */
037: public String getVersion() {
038: return version;
039: }
040:
041: TextDecl() {
042: }
043:
044: boolean init(boolean isXmlDecl, Encoding enc, byte[] buf, int off,
045: int end) throws InvalidTokenException {
046: // Skip <?xml
047: off += enc.getMinBytesPerChar() * 5;
048: // Skip ?>
049: end -= enc.getMinBytesPerChar() * 2;
050: ContentToken ct = new ContentToken();
051: int firstErrorIndex = -1;
052: try {
053: parsePseudoAttributes(enc, buf, off, end, ct);
054: } catch (InvalidTokenException e) {
055: firstErrorIndex = e.getOffset();
056: }
057: int nAtts = ct.getAttributeSpecifiedCount();
058: if (nAtts == 0) {
059: if (firstErrorIndex == -1)
060: firstErrorIndex = end;
061: throw new InvalidTokenException(firstErrorIndex);
062: }
063: String[] names = new String[nAtts];
064: String[] values = new String[nAtts];
065: char[] cbuf = new char[32];
066: for (int i = 0; i < nAtts; i++) {
067: int s = ct.getAttributeNameStart(i);
068: int e = ct.getAttributeNameEnd(i);
069: if (e - s > cbuf.length)
070: cbuf = new char[e - s];
071: names[i] = new String(cbuf, 0, enc.convert(buf, s, e, cbuf,
072: 0));
073: s = ct.getAttributeValueStart(i);
074: e = ct.getAttributeValueEnd(i);
075: if (e - s > cbuf.length)
076: cbuf = new char[e - s];
077: values[i] = new String(cbuf, 0, enc.convert(buf, s, e,
078: cbuf, 0));
079: }
080: int att = 0;
081: if (names[0].equals("version")) {
082: version = values[0];
083: att++;
084: }
085: if ((att == 1 || !isXmlDecl) && att < nAtts
086: && names[att].equals("encoding")) {
087: encoding = values[att];
088: if (values[att].length() == 0
089: || !Character.isLetter(values[att].charAt(0))
090: || values[att].indexOf(':') >= 0) {
091: int k = ct.getAttributeValueStart(att);
092: if (firstErrorIndex == -1 || k < firstErrorIndex)
093: firstErrorIndex = k;
094: }
095: att++;
096: } else if (!isXmlDecl)
097: firstErrorIndex = 0; // encoding is required in a TextDecl
098: boolean standalone = false;
099: if (isXmlDecl && att > 0 && att < nAtts
100: && names[att].equals("standalone")) {
101: if (values[att].equals("yes"))
102: standalone = true;
103: else if (!values[att].equals("no")) {
104: int k = ct.getAttributeValueStart(att);
105: if (firstErrorIndex == -1 || k < firstErrorIndex)
106: firstErrorIndex = k;
107: }
108: att++;
109: }
110: if (att < nAtts) {
111: int k = ct.getAttributeNameStart(att);
112: if (firstErrorIndex == -1 || k < firstErrorIndex)
113: firstErrorIndex = k;
114: }
115: if (firstErrorIndex != -1)
116: throw new InvalidTokenException(firstErrorIndex);
117: return standalone;
118: }
119:
120: private final void parsePseudoAttributes(Encoding enc, byte[] buf,
121: int off, int end, ContentToken ct)
122: throws InvalidTokenException {
123: final int minBPC = enc.getMinBytesPerChar();
124: for (;;) {
125: off = skipWS(enc, buf, off, end);
126: if (off == end)
127: break;
128: int nameStart = off;
129: int nameEnd;
130: nameLoop: for (;;) {
131: switch (enc.byteType(buf, off)) {
132: case Encoding.BT_NMSTRT:
133: break;
134: case Encoding.BT_EQUALS:
135: nameEnd = off;
136: break nameLoop;
137: case Encoding.BT_S:
138: case Encoding.BT_LF:
139: case Encoding.BT_CR:
140: nameEnd = off;
141: off += minBPC;
142: off = skipWS(enc, buf, off, end);
143: if (off == end || !enc.charMatches(buf, off, '='))
144: throw new InvalidTokenException(off);
145: break nameLoop;
146: default:
147: throw new InvalidTokenException(off);
148: }
149: off += minBPC;
150: if (off == end)
151: throw new InvalidTokenException(off);
152: }
153: off += minBPC;
154: off = skipWS(enc, buf, off, end);
155: if (off == end
156: || !(enc.charMatches(buf, off, '\'') || enc
157: .charMatches(buf, off, '"')))
158: throw new InvalidTokenException(off);
159: off += minBPC;
160: int valueStart = off;
161: valueLoop: for (;;) {
162: if (off == end)
163: throw new InvalidTokenException(off);
164: switch (enc.byteType(buf, off)) {
165: case Encoding.BT_NMSTRT:
166: case Encoding.BT_NAME:
167: case Encoding.BT_MINUS:
168: if ((enc.byteToAscii(buf, off) & ~0x7F) != 0)
169: throw new InvalidTokenException(off);
170: off += minBPC;
171: break;
172: case Encoding.BT_QUOT:
173: case Encoding.BT_APOS:
174: if (enc.byteType(buf, off) != enc.byteType(buf,
175: valueStart - minBPC))
176: throw new InvalidTokenException(off);
177: break valueLoop;
178: default:
179: throw new InvalidTokenException(off);
180: }
181: }
182: ct.appendAttribute(nameStart, nameEnd, valueStart, off,
183: true);
184: off += minBPC;
185: if (off == end)
186: break;
187: switch (enc.byteType(buf, off)) {
188: case Encoding.BT_S:
189: case Encoding.BT_LF:
190: case Encoding.BT_CR:
191: off += minBPC;
192: break;
193: default:
194: throw new InvalidTokenException(off);
195: }
196: }
197: }
198:
199: private int skipWS(Encoding enc, byte[] buf, int off, int end) {
200: loop: while (off != end) {
201: switch (enc.byteType(buf, off)) {
202: case Encoding.BT_S:
203: case Encoding.BT_LF:
204: case Encoding.BT_CR:
205: off += enc.getMinBytesPerChar();
206: break;
207: default:
208: break loop;
209: }
210: }
211: return off;
212: }
213: }
|