001: package com.jclark.xml.tok;
002:
003: /**
004: * Parses the prolog of an XML document.
005: * A <code>PrologParser</code> object represents the state of a parse
006: * of the prolog.
007: * It operates on the tokens returned
008: * by <code>Encoding.tokenizeProlog</code>.
009: * It does not build any data structures to represent the information
010: * in the prolog; instead it tells the caller the action needed
011: * for each token.
012: * The state of the parse can be saved by using the <code>clone</code>
013: * method.
014: * @version $Revision: 1.8 $ $Date: 1998/10/30 02:25:20 $
015: */
016: public class PrologParser implements Cloneable {
017: public static final int ACTION_NONE = 0;
018: public static final int ACTION_XML_DECL = ACTION_NONE + 1;
019: public static final int ACTION_TEXT_DECL = ACTION_XML_DECL + 1;
020: public static final int ACTION_PI = ACTION_TEXT_DECL + 1;
021: public static final int ACTION_COMMENT = ACTION_PI + 1;
022: public static final int ACTION_DOCTYPE_NAME = ACTION_COMMENT + 1;
023: public static final int ACTION_DOCTYPE_SYSTEM_ID = ACTION_DOCTYPE_NAME + 1;
024: public static final int ACTION_DOCTYPE_PUBLIC_ID = ACTION_DOCTYPE_SYSTEM_ID + 1;
025: public static final int ACTION_DOCTYPE_SUBSET = ACTION_DOCTYPE_PUBLIC_ID + 1;
026: public static final int ACTION_DOCTYPE_CLOSE = ACTION_DOCTYPE_SUBSET + 1;
027: public static final int ACTION_GENERAL_ENTITY_NAME = ACTION_DOCTYPE_CLOSE + 1;
028: public static final int ACTION_PARAM_ENTITY_NAME = ACTION_GENERAL_ENTITY_NAME + 1;
029: public static final int ACTION_ENTITY_VALUE_WITH_PEREFS = ACTION_PARAM_ENTITY_NAME + 1;
030: public static final int ACTION_ENTITY_VALUE_NO_PEREFS = ACTION_ENTITY_VALUE_WITH_PEREFS + 1;
031: public static final int ACTION_ENTITY_SYSTEM_ID = ACTION_ENTITY_VALUE_NO_PEREFS + 1;
032: public static final int ACTION_ENTITY_PUBLIC_ID = ACTION_ENTITY_SYSTEM_ID + 1;
033: public static final int ACTION_ENTITY_NOTATION_NAME = ACTION_ENTITY_PUBLIC_ID + 1;
034: public static final int ACTION_NOTATION_NAME = ACTION_ENTITY_NOTATION_NAME + 1;
035: public static final int ACTION_NOTATION_SYSTEM_ID = ACTION_NOTATION_NAME + 1;
036: public static final int ACTION_NOTATION_PUBLIC_ID = ACTION_NOTATION_SYSTEM_ID + 1;
037: public static final int ACTION_ATTRIBUTE_NAME = ACTION_NOTATION_PUBLIC_ID + 1;
038: public static final int ACTION_ATTRIBUTE_TYPE_CDATA = ACTION_ATTRIBUTE_NAME + 1;
039: public static final int ACTION_ATTRIBUTE_TYPE_ID = ACTION_ATTRIBUTE_TYPE_CDATA + 1;
040: public static final int ACTION_ATTRIBUTE_TYPE_IDREF = ACTION_ATTRIBUTE_TYPE_ID + 1;
041: public static final int ACTION_ATTRIBUTE_TYPE_IDREFS = ACTION_ATTRIBUTE_TYPE_IDREF + 1;
042: public static final int ACTION_ATTRIBUTE_TYPE_ENTITY = ACTION_ATTRIBUTE_TYPE_IDREFS + 1;
043: public static final int ACTION_ATTRIBUTE_TYPE_ENTITIES = ACTION_ATTRIBUTE_TYPE_ENTITY + 1;
044: public static final int ACTION_ATTRIBUTE_TYPE_NMTOKEN = ACTION_ATTRIBUTE_TYPE_ENTITIES + 1;
045: public static final int ACTION_ATTRIBUTE_TYPE_NMTOKENS = ACTION_ATTRIBUTE_TYPE_NMTOKEN + 1;
046: public static final int ACTION_ATTRIBUTE_ENUM_VALUE = ACTION_ATTRIBUTE_TYPE_NMTOKENS + 1;
047: public static final int ACTION_ATTRIBUTE_NOTATION_VALUE = ACTION_ATTRIBUTE_ENUM_VALUE + 1;
048: public static final int ACTION_ATTLIST_ELEMENT_NAME = ACTION_ATTRIBUTE_NOTATION_VALUE + 1;
049: public static final int ACTION_IMPLIED_ATTRIBUTE_VALUE = ACTION_ATTLIST_ELEMENT_NAME + 1;
050: public static final int ACTION_REQUIRED_ATTRIBUTE_VALUE = ACTION_IMPLIED_ATTRIBUTE_VALUE + 1;
051: public static final int ACTION_DEFAULT_ATTRIBUTE_VALUE = ACTION_REQUIRED_ATTRIBUTE_VALUE + 1;
052: public static final int ACTION_FIXED_ATTRIBUTE_VALUE = ACTION_DEFAULT_ATTRIBUTE_VALUE + 1;
053: public static final int ACTION_ELEMENT_NAME = ACTION_FIXED_ATTRIBUTE_VALUE + 1;
054: public static final int ACTION_CONTENT_ANY = ACTION_ELEMENT_NAME + 1;
055: public static final int ACTION_CONTENT_EMPTY = ACTION_CONTENT_ANY + 1;
056: public static final int ACTION_CONTENT_PCDATA = ACTION_CONTENT_EMPTY + 1;
057: public static final int ACTION_GROUP_OPEN = ACTION_CONTENT_PCDATA + 1;
058: public static final int ACTION_GROUP_CLOSE = ACTION_GROUP_OPEN + 1;
059: public static final int ACTION_GROUP_CLOSE_REP = ACTION_GROUP_CLOSE + 1;
060: public static final int ACTION_GROUP_CLOSE_OPT = ACTION_GROUP_CLOSE_REP + 1;
061: public static final int ACTION_GROUP_CLOSE_PLUS = ACTION_GROUP_CLOSE_OPT + 1;
062: public static final int ACTION_GROUP_CHOICE = ACTION_GROUP_CLOSE_PLUS + 1;
063: public static final int ACTION_GROUP_SEQUENCE = ACTION_GROUP_CHOICE + 1;
064: public static final int ACTION_CONTENT_ELEMENT = ACTION_GROUP_SEQUENCE + 1;
065: public static final int ACTION_CONTENT_ELEMENT_REP = ACTION_CONTENT_ELEMENT + 1;
066: public static final int ACTION_CONTENT_ELEMENT_OPT = ACTION_CONTENT_ELEMENT_REP + 1;
067: public static final int ACTION_CONTENT_ELEMENT_PLUS = ACTION_CONTENT_ELEMENT_OPT + 1;
068: public static final int ACTION_OUTER_PARAM_ENTITY_REF = ACTION_CONTENT_ELEMENT_PLUS + 1;
069: public static final int ACTION_INNER_PARAM_ENTITY_REF = ACTION_OUTER_PARAM_ENTITY_REF + 1;
070: public static final int ACTION_IGNORE_SECT = ACTION_INNER_PARAM_ENTITY_REF + 1;
071: public static final int ACTION_DECL_CLOSE = ACTION_IGNORE_SECT + 1;
072:
073: private static final byte prolog0 = 0;
074: private static final byte prolog1 = prolog0 + 1;
075: private static final byte prolog2 = prolog1 + 1;
076: private static final byte doctype0 = prolog2 + 1;
077: private static final byte doctype1 = doctype0 + 1;
078: private static final byte doctype2 = doctype1 + 1;
079: private static final byte doctype3 = doctype2 + 1;
080: private static final byte doctype4 = doctype3 + 1;
081: private static final byte doctype5 = doctype4 + 1;
082: private static final byte internalSubset = doctype5 + 1;
083: private static final byte entity0 = internalSubset + 1;
084: private static final byte entity1 = entity0 + 1;
085: private static final byte entity2 = entity1 + 1;
086: private static final byte entity3 = entity2 + 1;
087: private static final byte entity4 = entity3 + 1;
088: private static final byte entity5 = entity4 + 1;
089: private static final byte entity6 = entity5 + 1;
090: private static final byte entity7 = entity6 + 1;
091: private static final byte entity8 = entity7 + 1;
092: private static final byte entity9 = entity8 + 1;
093: private static final byte notation0 = entity9 + 1;
094: private static final byte notation1 = notation0 + 1;
095: private static final byte notation2 = notation1 + 1;
096: private static final byte notation3 = notation2 + 1;
097: private static final byte notation4 = notation3 + 1;
098: private static final byte attlist0 = notation4 + 1;
099: private static final byte attlist1 = attlist0 + 1;
100: private static final byte attlist2 = attlist1 + 1;
101: private static final byte attlist3 = attlist2 + 1;
102: private static final byte attlist4 = attlist3 + 1;
103: private static final byte attlist5 = attlist4 + 1;
104: private static final byte attlist6 = attlist5 + 1;
105: private static final byte attlist7 = attlist6 + 1;
106: private static final byte attlist8 = attlist7 + 1;
107: private static final byte attlist9 = attlist8 + 1;
108: private static final byte element0 = attlist9 + 1;
109: private static final byte element1 = element0 + 1;
110: private static final byte element2 = element1 + 1;
111: private static final byte element3 = element2 + 1;
112: private static final byte element4 = element3 + 1;
113: private static final byte element5 = element4 + 1;
114: private static final byte element6 = element5 + 1;
115: private static final byte element7 = element6 + 1;
116: private static final byte declClose = element7 + 1;
117: private static final byte externalSubset0 = declClose + 1;
118: private static final byte externalSubset1 = externalSubset0 + 1;
119: private static final byte condSect0 = externalSubset1 + 1;
120: private static final byte condSect1 = condSect0 + 1;
121: private static final byte condSect2 = condSect1 + 1;
122:
123: private byte state;
124: private int groupLevel;
125: private int includeLevel;
126: private byte connector[] = new byte[2];
127: private boolean documentEntity;
128:
129: public static final byte PROLOG = 0;
130: public static final byte EXTERNAL_ENTITY = 1;
131: public static final byte INTERNAL_ENTITY = 2;
132:
133: public PrologParser(byte type) {
134: switch (type) {
135: case PROLOG:
136: documentEntity = true;
137: state = prolog0;
138: break;
139: case EXTERNAL_ENTITY:
140: documentEntity = false;
141: state = externalSubset0;
142: break;
143: case INTERNAL_ENTITY:
144: documentEntity = false;
145: state = externalSubset1;
146: break;
147: default:
148: throw new IllegalArgumentException();
149: }
150: }
151:
152: public final void end() throws PrologSyntaxException {
153: switch (state) {
154: case prolog0:
155: case prolog1:
156: case prolog2:
157: break;
158: case externalSubset0:
159: case externalSubset1:
160: if (includeLevel == 0)
161: break;
162: /* fall through */
163: default:
164: throw new PrologSyntaxException();
165: }
166: }
167:
168: public int action(int tok, byte[] buf, int start, int end,
169: Encoding enc) throws PrologSyntaxException {
170: switch (state) {
171: case prolog0:
172: state = prolog1;
173: if (tok == Encoding.TOK_XML_DECL)
174: return ACTION_XML_DECL;
175: /* fall through */
176: case prolog1:
177: if (tok == Encoding.TOK_DECL_OPEN
178: && enc.matchesXMLString(buf, start + 2
179: * enc.getMinBytesPerChar(), end, "DOCTYPE")) {
180: state = doctype0;
181: return ACTION_NONE;
182: }
183: /* fall through */
184: case prolog2:
185: switch (tok) {
186: case Encoding.TOK_PI:
187: return ACTION_PI;
188: case Encoding.TOK_COMMENT:
189: return ACTION_COMMENT;
190: }
191: break;
192: case doctype0:
193: if (tok == Encoding.TOK_NAME) {
194: state = doctype1;
195: return ACTION_DOCTYPE_NAME;
196: }
197: break;
198: case doctype1:
199: switch (tok) {
200: case Encoding.TOK_OPEN_BRACKET:
201: state = internalSubset;
202: return ACTION_DOCTYPE_SUBSET;
203: case Encoding.TOK_DECL_CLOSE:
204: state = prolog2;
205: return ACTION_DOCTYPE_CLOSE;
206: case Encoding.TOK_NAME:
207: if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
208: state = doctype3;
209: return ACTION_NONE;
210: }
211: if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
212: state = doctype2;
213: return ACTION_NONE;
214: }
215: break;
216: }
217: break;
218: case doctype2:
219: if (tok == Encoding.TOK_LITERAL) {
220: state = doctype3;
221: return ACTION_DOCTYPE_PUBLIC_ID;
222: }
223: break;
224: case doctype3:
225: if (tok == Encoding.TOK_LITERAL) {
226: state = doctype4;
227: return ACTION_DOCTYPE_SYSTEM_ID;
228: }
229: break;
230: case doctype4:
231: switch (tok) {
232: case Encoding.TOK_OPEN_BRACKET:
233: state = internalSubset;
234: return ACTION_DOCTYPE_SUBSET;
235: case Encoding.TOK_DECL_CLOSE:
236: state = prolog2;
237: return ACTION_DOCTYPE_CLOSE;
238: }
239: break;
240: case doctype5:
241: if (tok == Encoding.TOK_DECL_CLOSE) {
242: state = prolog2;
243: return ACTION_DOCTYPE_CLOSE;
244: }
245: break;
246: case externalSubset0:
247: state = externalSubset1;
248: if (tok == Encoding.TOK_XML_DECL)
249: return ACTION_TEXT_DECL;
250: /* fall through */
251: case externalSubset1:
252: switch (tok) {
253: case Encoding.TOK_COND_SECT_OPEN:
254: state = condSect0;
255: return ACTION_NONE;
256: case Encoding.TOK_COND_SECT_CLOSE:
257: if (includeLevel == 0)
258: break;
259: --includeLevel;
260: return ACTION_NONE;
261: case Encoding.TOK_CLOSE_BRACKET:
262: throw new PrologSyntaxException();
263: }
264: /* fall through */
265: case internalSubset:
266: switch (tok) {
267: case Encoding.TOK_DECL_OPEN:
268: if (enc.matchesXMLString(buf, start + 2
269: * enc.getMinBytesPerChar(), end, "ENTITY")) {
270: state = entity0;
271: return ACTION_NONE;
272: }
273: if (enc.matchesXMLString(buf, start + 2
274: * enc.getMinBytesPerChar(), end, "ATTLIST")) {
275: state = attlist0;
276: return ACTION_NONE;
277: }
278: if (enc.matchesXMLString(buf, start + 2
279: * enc.getMinBytesPerChar(), end, "ELEMENT")) {
280: state = element0;
281: return ACTION_NONE;
282: }
283: if (enc.matchesXMLString(buf, start + 2
284: * enc.getMinBytesPerChar(), end, "NOTATION")) {
285: state = notation0;
286: return ACTION_NONE;
287: }
288: break;
289: case Encoding.TOK_PI:
290: return ACTION_PI;
291: case Encoding.TOK_COMMENT:
292: return ACTION_COMMENT;
293: case Encoding.TOK_PARAM_ENTITY_REF:
294: return ACTION_OUTER_PARAM_ENTITY_REF;
295: case Encoding.TOK_CLOSE_BRACKET:
296: state = doctype5;
297: return ACTION_NONE;
298: }
299: break;
300: case entity0:
301: switch (tok) {
302: case Encoding.TOK_PERCENT:
303: state = entity1;
304: return ACTION_NONE;
305: case Encoding.TOK_NAME:
306: state = entity2;
307: return ACTION_GENERAL_ENTITY_NAME;
308: }
309: break;
310: case entity1:
311: if (tok == Encoding.TOK_NAME) {
312: state = entity7;
313: return ACTION_PARAM_ENTITY_NAME;
314: }
315: break;
316: case entity2:
317: switch (tok) {
318: case Encoding.TOK_NAME:
319: if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
320: state = entity4;
321: return ACTION_NONE;
322: }
323: if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
324: state = entity3;
325: return ACTION_NONE;
326: }
327: break;
328: case Encoding.TOK_LITERAL:
329: state = declClose;
330: return documentEntity ? ACTION_ENTITY_VALUE_NO_PEREFS
331: : ACTION_ENTITY_VALUE_WITH_PEREFS;
332: }
333: break;
334: case entity3:
335: if (tok == Encoding.TOK_LITERAL) {
336: state = entity4;
337: return ACTION_ENTITY_PUBLIC_ID;
338: }
339: break;
340: case entity4:
341: if (tok == Encoding.TOK_LITERAL) {
342: state = entity5;
343: return ACTION_ENTITY_SYSTEM_ID;
344: }
345: break;
346: case entity5:
347: switch (tok) {
348: case Encoding.TOK_DECL_CLOSE:
349: state = documentEntity ? internalSubset
350: : externalSubset1;
351: return ACTION_DECL_CLOSE;
352: case Encoding.TOK_NAME:
353: if (enc.matchesXMLString(buf, start, end, "NDATA")) {
354: state = entity6;
355: return ACTION_NONE;
356: }
357: break;
358: }
359: break;
360: case entity6:
361: switch (tok) {
362: case Encoding.TOK_NAME:
363: state = declClose;
364: return ACTION_ENTITY_NOTATION_NAME;
365: }
366: break;
367: case entity7:
368: switch (tok) {
369: case Encoding.TOK_NAME:
370: if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
371: state = entity9;
372: return ACTION_NONE;
373: }
374: if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
375: state = entity8;
376: return ACTION_NONE;
377: }
378: break;
379: case Encoding.TOK_LITERAL:
380: state = declClose;
381: return documentEntity ? ACTION_ENTITY_VALUE_NO_PEREFS
382: : ACTION_ENTITY_VALUE_WITH_PEREFS;
383: }
384: break;
385: case entity8:
386: if (tok == Encoding.TOK_LITERAL) {
387: state = entity9;
388: return ACTION_ENTITY_PUBLIC_ID;
389: }
390: break;
391: case entity9:
392: if (tok == Encoding.TOK_LITERAL) {
393: state = declClose;
394: return ACTION_ENTITY_SYSTEM_ID;
395: }
396: break;
397: case notation0:
398: if (tok == Encoding.TOK_NAME) {
399: state = notation1;
400: return ACTION_NOTATION_NAME;
401: }
402: break;
403: case notation1:
404: switch (tok) {
405: case Encoding.TOK_NAME:
406: if (enc.matchesXMLString(buf, start, end, "SYSTEM")) {
407: state = notation3;
408: return ACTION_NONE;
409: }
410: if (enc.matchesXMLString(buf, start, end, "PUBLIC")) {
411: state = notation2;
412: return ACTION_NONE;
413: }
414: break;
415: }
416: break;
417: case notation2:
418: if (tok == Encoding.TOK_LITERAL) {
419: state = notation4;
420: return ACTION_NOTATION_PUBLIC_ID;
421: }
422: break;
423: case notation3:
424: if (tok == Encoding.TOK_LITERAL) {
425: state = declClose;
426: return ACTION_NOTATION_SYSTEM_ID;
427: }
428: break;
429: case notation4:
430: switch (tok) {
431: case Encoding.TOK_LITERAL:
432: state = declClose;
433: return ACTION_NOTATION_SYSTEM_ID;
434: case Encoding.TOK_DECL_CLOSE:
435: state = documentEntity ? internalSubset
436: : externalSubset1;
437: return ACTION_DECL_CLOSE;
438: }
439: break;
440: case attlist0:
441: if (tok == Encoding.TOK_NAME) {
442: state = attlist1;
443: return ACTION_ATTLIST_ELEMENT_NAME;
444: }
445: break;
446: case attlist1:
447: switch (tok) {
448: case Encoding.TOK_DECL_CLOSE:
449: state = documentEntity ? internalSubset
450: : externalSubset1;
451: return ACTION_NONE;
452: case Encoding.TOK_NAME:
453: state = attlist2;
454: return ACTION_ATTRIBUTE_NAME;
455: }
456: break;
457: case attlist2:
458: switch (tok) {
459: case Encoding.TOK_NAME:
460: for (int i = 0; i < attributeTypes.length; i++)
461: if (enc.matchesXMLString(buf, start, end,
462: attributeTypes[i])) {
463: state = attlist8;
464: return ACTION_ATTRIBUTE_TYPE_CDATA + i;
465: }
466: if (enc.matchesXMLString(buf, start, end, "NOTATION")) {
467: state = attlist5;
468: return ACTION_NONE;
469: }
470: break;
471: case Encoding.TOK_OPEN_PAREN:
472: groupLevel = 1;
473: state = attlist3;
474: return ACTION_NONE;
475: }
476: break;
477: case attlist3:
478: switch (tok) {
479: case Encoding.TOK_NMTOKEN:
480: case Encoding.TOK_NAME:
481: state = attlist4;
482: return ACTION_ATTRIBUTE_ENUM_VALUE;
483: }
484: break;
485: case attlist4:
486: switch (tok) {
487: case Encoding.TOK_CLOSE_PAREN:
488: state = attlist8;
489: groupLevel = 0;
490: return ACTION_NONE;
491: case Encoding.TOK_OR:
492: state = attlist3;
493: return ACTION_NONE;
494: }
495: break;
496: case attlist5:
497: if (tok == Encoding.TOK_OPEN_PAREN) {
498: state = attlist6;
499: groupLevel = 1;
500: return ACTION_NONE;
501: }
502: break;
503: case attlist6:
504: if (tok == Encoding.TOK_NAME) {
505: state = attlist7;
506: return ACTION_ATTRIBUTE_NOTATION_VALUE;
507: }
508: break;
509: case attlist7:
510: switch (tok) {
511: case Encoding.TOK_CLOSE_PAREN:
512: groupLevel = 0;
513: state = attlist8;
514: return ACTION_NONE;
515: case Encoding.TOK_OR:
516: state = attlist6;
517: return ACTION_NONE;
518: }
519: break;
520: /* default value */
521: case attlist8:
522: switch (tok) {
523: case Encoding.TOK_POUND_NAME:
524: if (enc.matchesXMLString(buf, start
525: + enc.getMinBytesPerChar(), end, "IMPLIED")) {
526: state = attlist1;
527: return ACTION_IMPLIED_ATTRIBUTE_VALUE;
528: }
529: if (enc.matchesXMLString(buf, start
530: + enc.getMinBytesPerChar(), end, "REQUIRED")) {
531: state = attlist1;
532: return ACTION_REQUIRED_ATTRIBUTE_VALUE;
533: }
534: if (enc.matchesXMLString(buf, start
535: + enc.getMinBytesPerChar(), end, "FIXED")) {
536: state = attlist9;
537: return ACTION_NONE;
538: }
539: break;
540: case Encoding.TOK_LITERAL:
541: state = attlist1;
542: return ACTION_DEFAULT_ATTRIBUTE_VALUE;
543: }
544: break;
545: case attlist9:
546: if (tok == Encoding.TOK_LITERAL) {
547: state = attlist1;
548: return ACTION_FIXED_ATTRIBUTE_VALUE;
549: }
550: break;
551: case element0:
552: if (tok == Encoding.TOK_NAME) {
553: state = element1;
554: return ACTION_ELEMENT_NAME;
555: }
556: break;
557: case element1:
558: switch (tok) {
559: case Encoding.TOK_NAME:
560: if (enc.matchesXMLString(buf, start, end, "EMPTY")) {
561: state = declClose;
562: return ACTION_CONTENT_EMPTY;
563: }
564: if (enc.matchesXMLString(buf, start, end, "ANY")) {
565: state = declClose;
566: return ACTION_CONTENT_ANY;
567: }
568: break;
569: case Encoding.TOK_OPEN_PAREN:
570: state = element2;
571: groupLevel = 1;
572: connector[0] = (byte) 0;
573: return ACTION_GROUP_OPEN;
574: }
575: break;
576: case element2:
577: switch (tok) {
578: case Encoding.TOK_POUND_NAME:
579: if (enc.matchesXMLString(buf, start
580: + enc.getMinBytesPerChar(), end, "PCDATA")) {
581: state = element3;
582: return ACTION_CONTENT_PCDATA;
583: }
584: break;
585: case Encoding.TOK_OPEN_PAREN:
586: groupLevel = 2;
587: connector[1] = (byte) 0;
588: state = element6;
589: return ACTION_GROUP_OPEN;
590: case Encoding.TOK_NAME:
591: state = element7;
592: return ACTION_CONTENT_ELEMENT;
593: case Encoding.TOK_NAME_QUESTION:
594: state = element7;
595: return ACTION_CONTENT_ELEMENT_OPT;
596: case Encoding.TOK_NAME_ASTERISK:
597: state = element7;
598: return ACTION_CONTENT_ELEMENT_REP;
599: case Encoding.TOK_NAME_PLUS:
600: state = element7;
601: return ACTION_CONTENT_ELEMENT_PLUS;
602: }
603: break;
604: case element3:
605: switch (tok) {
606: case Encoding.TOK_CLOSE_PAREN:
607: case Encoding.TOK_CLOSE_PAREN_ASTERISK:
608: groupLevel = 0;
609: state = declClose;
610: return ACTION_GROUP_CLOSE_REP;
611: case Encoding.TOK_OR:
612: state = element4;
613: return ACTION_GROUP_CHOICE;
614: }
615: break;
616: case element4:
617: if (tok == Encoding.TOK_NAME) {
618: state = element5;
619: return ACTION_CONTENT_ELEMENT;
620: }
621: break;
622: case element5:
623: switch (tok) {
624: case Encoding.TOK_CLOSE_PAREN_ASTERISK:
625: groupLevel = 0;
626: state = declClose;
627: return ACTION_GROUP_CLOSE_REP;
628: case Encoding.TOK_OR:
629: state = element4;
630: return ACTION_GROUP_CHOICE;
631: }
632: break;
633: case element6:
634: switch (tok) {
635: case Encoding.TOK_OPEN_PAREN:
636: if (groupLevel >= connector.length) {
637: byte[] tem = new byte[connector.length << 1];
638: System.arraycopy(connector, 0, tem, 0,
639: connector.length);
640: connector = tem;
641: }
642: connector[groupLevel] = (byte) 0;
643: groupLevel += 1;
644: return ACTION_GROUP_OPEN;
645: case Encoding.TOK_NAME:
646: state = element7;
647: return ACTION_CONTENT_ELEMENT;
648: case Encoding.TOK_NAME_QUESTION:
649: state = element7;
650: return ACTION_CONTENT_ELEMENT_OPT;
651: case Encoding.TOK_NAME_ASTERISK:
652: state = element7;
653: return ACTION_CONTENT_ELEMENT_REP;
654: case Encoding.TOK_NAME_PLUS:
655: state = element7;
656: return ACTION_CONTENT_ELEMENT_PLUS;
657: }
658: break;
659: case element7:
660: switch (tok) {
661: case Encoding.TOK_CLOSE_PAREN:
662: groupLevel -= 1;
663: if (groupLevel == 0)
664: state = declClose;
665: return ACTION_GROUP_CLOSE;
666: case Encoding.TOK_CLOSE_PAREN_ASTERISK:
667: groupLevel -= 1;
668: if (groupLevel == 0)
669: state = declClose;
670: return ACTION_GROUP_CLOSE_REP;
671: case Encoding.TOK_CLOSE_PAREN_QUESTION:
672: groupLevel -= 1;
673: if (groupLevel == 0)
674: state = declClose;
675: return ACTION_GROUP_CLOSE_OPT;
676: case Encoding.TOK_CLOSE_PAREN_PLUS:
677: groupLevel -= 1;
678: if (groupLevel == 0)
679: state = declClose;
680: return ACTION_GROUP_CLOSE_PLUS;
681: case Encoding.TOK_COMMA:
682: state = element6;
683: if (connector[groupLevel - 1] == (byte) '|')
684: break;
685: connector[groupLevel - 1] = (byte) ',';
686: return ACTION_GROUP_SEQUENCE;
687: case Encoding.TOK_OR:
688: state = element6;
689: if (connector[groupLevel - 1] == (byte) ',')
690: break;
691: connector[groupLevel - 1] = (byte) '|';
692: return ACTION_GROUP_CHOICE;
693: }
694: break;
695: case declClose:
696: if (tok == Encoding.TOK_DECL_CLOSE) {
697: state = documentEntity ? internalSubset
698: : externalSubset1;
699: return ACTION_DECL_CLOSE;
700: }
701: break;
702: case condSect0:
703: if (tok == Encoding.TOK_NAME) {
704: if (enc.matchesXMLString(buf, start, end, "INCLUDE")) {
705: state = condSect1;
706: return ACTION_NONE;
707: }
708: if (enc.matchesXMLString(buf, start, end, "IGNORE")) {
709: state = condSect2;
710: return ACTION_NONE;
711: }
712: }
713: break;
714: case condSect1:
715: if (tok == Encoding.TOK_OPEN_BRACKET) {
716: state = externalSubset1;
717: includeLevel++;
718: return ACTION_NONE;
719: }
720: break;
721: case condSect2:
722: if (tok == Encoding.TOK_OPEN_BRACKET) {
723: state = externalSubset1;
724: return ACTION_IGNORE_SECT;
725: }
726: break;
727: }
728: if (tok == Encoding.TOK_PROLOG_S)
729: return ACTION_NONE;
730: if (tok == Encoding.TOK_PARAM_ENTITY_REF && !documentEntity)
731: return ACTION_INNER_PARAM_ENTITY_REF;
732: throw new PrologSyntaxException();
733: }
734:
735: public Object clone() {
736: try {
737: PrologParser copy = (PrologParser) super .clone();
738: copy.connector = new byte[connector.length];
739: System.arraycopy(connector, 0, copy.connector, 0,
740: groupLevel);
741: return copy;
742: } catch (CloneNotSupportedException e) {
743: throw new InternalError();
744: }
745: }
746:
747: public final int getGroupLevel() {
748: return groupLevel;
749: }
750:
751: private static final String[] attributeTypes = { "CDATA", "ID",
752: "IDREF", "IDREFS", "ENTITY", "ENTITIES", "NMTOKEN",
753: "NMTOKENS", };
754: }
|