0001: package com.jclark.xml.parse;
0002:
0003: import java.io.IOException;
0004: import java.io.InputStream;
0005: import java.io.Writer;
0006: import java.net.URL;
0007: import java.util.Enumeration;
0008: import java.util.Vector;
0009: import java.util.ResourceBundle;
0010: import java.util.MissingResourceException;
0011: import java.util.Locale;
0012: import java.text.MessageFormat;
0013:
0014: import com.jclark.util.Hashtable;
0015:
0016: import com.jclark.xml.parse.base.Application;
0017: import com.jclark.xml.tok.*;
0018:
0019: /**
0020: * Parses a single entity.
0021: * @version $Revision: 1.31 $ $Date: 1998/12/28 08:12:30 $
0022: */
0023: class EntityParser extends ContentToken implements StartElementEvent,
0024: EndElementEvent, CharacterDataEvent,
0025: ProcessingInstructionEvent, EndPrologEvent, CommentEvent,
0026: StartCdataSectionEvent, EndCdataSectionEvent,
0027: StartEntityReferenceEvent, EndEntityReferenceEvent,
0028: ParseLocation {
0029:
0030: private static final class EntityImpl implements Entity {
0031: byte[] text;
0032: String systemId;
0033: String publicId;
0034: URL baseURL;
0035: String notationName;
0036: boolean open;
0037:
0038: public String getSystemId() {
0039: return systemId;
0040: }
0041:
0042: public URL getBase() {
0043: return baseURL;
0044: }
0045:
0046: public String getPublicId() {
0047: return publicId;
0048: }
0049:
0050: public String getNotationName() {
0051: return notationName;
0052: }
0053:
0054: public String getReplacementText() {
0055: if (text == null)
0056: return null;
0057: StringBuffer buf = new StringBuffer(text.length >> 1);
0058: for (int i = 0; i < text.length; i += 2)
0059: buf
0060: .append((char) (((text[i] & 0xFF) << 8) | (text[i + 1] & 0xFF)));
0061: return buf.toString();
0062: }
0063: }
0064:
0065: private static final class ElementTypeImpl implements ElementType {
0066: /* defaultIndex is equal to ID_DEFAULT_INDEX if the attribute
0067: is an ID attribute; this relies on the fact that ID attributes
0068: can't have defaults */
0069: static final int ID_DEFAULT_INDEX = -2;
0070:
0071: private static class Attribute implements AttributeDefinition {
0072: int defaultIndex = -1;
0073: boolean required = false;
0074: byte type = UNDECLARED;
0075: Vector values = null;
0076: String name = null;
0077: String value = null;
0078: String unnormalizedValue = null;
0079:
0080: public String getDefaultValue() {
0081: return value;
0082: }
0083:
0084: public String getDefaultUnnormalizedValue() {
0085: return unnormalizedValue;
0086: }
0087:
0088: public boolean isRequired() {
0089: return required;
0090: }
0091:
0092: public byte getType() {
0093: return type;
0094: }
0095:
0096: public Enumeration allowedValues() {
0097: if (values == null)
0098: return null;
0099: return values.elements();
0100: }
0101: }
0102:
0103: ElementTypeImpl() {
0104: }
0105:
0106: void setContentType(byte n) {
0107: contentType = n;
0108: }
0109:
0110: public byte getContentType() {
0111: return contentType;
0112: }
0113:
0114: void setContentSpec(String s) {
0115: contentSpec = s;
0116: }
0117:
0118: public String getContentSpec() {
0119: return contentSpec;
0120: }
0121:
0122: int getDefaultAttributeCount() {
0123: return nDefaultAtts;
0124: }
0125:
0126: int getAttributeDefaultIndex(String name) {
0127: Attribute att = (Attribute) attTable.get(name);
0128: return att == null ? -1 : att.defaultIndex;
0129: }
0130:
0131: String getDefaultAttributeValue(int i) {
0132: if (i >= nDefaultAtts)
0133: throw new IndexOutOfBoundsException();
0134: return defaultAtts[i].value;
0135: }
0136:
0137: String getDefaultAttributeUnnormalizedValue(int i) {
0138: if (i >= nDefaultAtts)
0139: throw new IndexOutOfBoundsException();
0140: return defaultAtts[i].unnormalizedValue;
0141: }
0142:
0143: String getDefaultAttributeName(int i) {
0144: if (i >= nDefaultAtts)
0145: throw new IndexOutOfBoundsException();
0146: return defaultAtts[i].name;
0147: }
0148:
0149: boolean isAttributeCdata(String name) {
0150: Attribute att = (Attribute) attTable.get(name);
0151: return att == null || att.type <= 0;
0152: }
0153:
0154: public Enumeration attributeNames() {
0155: return attTable.keys();
0156: }
0157:
0158: public AttributeDefinition getAttributeDefinition(String name) {
0159: return (AttributeDefinition) attTable.get(name);
0160: }
0161:
0162: /* Value may be null if the default is #IMPLIED or #REQUIRED. */
0163: boolean appendAttribute(String name, String value,
0164: String unnormalizedValue, boolean required,
0165: byte attributeType, Vector allowedValues) {
0166: if (attTable.get(name) != null)
0167: return false;
0168: Attribute att = new Attribute();
0169: attTable.put(name, att);
0170: att.name = name;
0171: att.value = value;
0172: att.unnormalizedValue = unnormalizedValue;
0173: if (value == null)
0174: att.defaultIndex = -1;
0175: else {
0176: if (nDefaultAtts == defaultAtts.length) {
0177: Attribute[] tem = defaultAtts;
0178: defaultAtts = new Attribute[tem.length << 1];
0179: System
0180: .arraycopy(tem, 0, defaultAtts, 0,
0181: tem.length);
0182: }
0183: defaultAtts[nDefaultAtts] = att;
0184: att.defaultIndex = nDefaultAtts++;
0185: }
0186: att.required = required;
0187: att.type = attributeType;
0188: if (attributeType == AttributeDefinition.ID
0189: && value == null)
0190: att.defaultIndex = ElementTypeImpl.ID_DEFAULT_INDEX;
0191: att.values = allowedValues;
0192: return true;
0193: }
0194:
0195: private final Hashtable attTable = new Hashtable();
0196: private int nDefaultAtts = 0;
0197: private Attribute[] defaultAtts = new Attribute[4];
0198: private byte contentType = ElementType.UNDECLARED_CONTENT;
0199: private String contentSpec = null;
0200: }
0201:
0202: private static class DTDImpl implements DTD {
0203: String name = null;
0204: EntityImpl externalSubset = null;
0205: URL baseURL = null;
0206: Hashtable elementTypeTable = new Hashtable();
0207: Hashtable generalEntityTable = new Hashtable();
0208: Hashtable paramEntityTable = new Hashtable();
0209: Hashtable notationTable = new Hashtable();
0210: boolean complete = true;
0211: boolean standalone = false;
0212: boolean hasInternalSubset = false;
0213:
0214: DTDImpl(URL baseURL) {
0215: this .baseURL = baseURL;
0216: }
0217:
0218: public String getDocumentTypeName() {
0219: return name;
0220: }
0221:
0222: public boolean isComplete() {
0223: return complete;
0224: }
0225:
0226: public boolean isStandalone() {
0227: return standalone;
0228: }
0229:
0230: public Enumeration entityNames(byte entityType) {
0231: switch (entityType) {
0232: case GENERAL_ENTITY:
0233: return generalEntityTable.keys();
0234: case PARAMETER_ENTITY:
0235: return paramEntityTable.keys();
0236: case NOTATION:
0237: return notationTable.keys();
0238: }
0239: throw new IllegalArgumentException();
0240: }
0241:
0242: public Entity getEntity(byte entityType, String entityName) {
0243: switch (entityType) {
0244: case GENERAL_ENTITY:
0245: return (Entity) generalEntityTable.get(entityName);
0246: case PARAMETER_ENTITY:
0247: if (entityName.equals(DTD.EXTERNAL_SUBSET_NAME)) {
0248: if (externalSubset.systemId == null)
0249: return null;
0250: else
0251: return externalSubset;
0252: }
0253: return (Entity) paramEntityTable.get(entityName);
0254: case NOTATION:
0255: return (Entity) notationTable.get(entityName);
0256:
0257: }
0258: throw new IllegalArgumentException();
0259: }
0260:
0261: public Enumeration elementTypeNames() {
0262: return elementTypeTable.keys();
0263: }
0264:
0265: public ElementType getElementType(String name) {
0266: return (ElementType) elementTypeTable.get(name);
0267: }
0268: }
0269:
0270: private static final boolean forceStandalone = false;
0271:
0272: private static final int READSIZE = 1024 * 8;
0273:
0274: private static class StartExternalSubsetEvent implements
0275: StartEntityReferenceEvent {
0276: public String getName() {
0277: return DTD.EXTERNAL_SUBSET_NAME;
0278: }
0279: }
0280:
0281: private static final StartEntityReferenceEvent startExternalSubsetEvent = new StartExternalSubsetEvent();
0282:
0283: private EntityParser parent;
0284: private String internalEntityName;
0285: private boolean isParameterEntity;
0286: private byte[] buf;
0287: private int bufStart;
0288: private int bufEnd;
0289: private int currentTokenStart;
0290: private InputStream in;
0291: private URL baseURL;
0292: private String location;
0293: private Position pos = new Position();
0294: // The offset in buffer corresponding to pos.
0295: private int posOff = 0;
0296: private long bufEndStreamOffset = 0;
0297: private Encoding enc;
0298: // True if the encoding in the XML declaration should be ignored.
0299: private boolean ignoreDeclEnc;
0300: private/* final */int minBPC;
0301: private int fixBPC;
0302: private StringConversionCache stringCache;
0303: private Encoding internalEnc;
0304: private StringConversionCache internalStringCache;
0305: private Application app;
0306: private DTDImpl dtd;
0307: private EntityManager entityManager;
0308: private Locale locale;
0309:
0310: private int nameStart;
0311: // Some temporary buffers
0312: private Buffer valueBuf;
0313: private char[] data;
0314: private static final int INIT_DATA_BUF_SIZE = 65;
0315: private int dataLength;
0316: private char[] dataBuf;
0317: private boolean dataIsRef = false;
0318: private String[] attValues;
0319: private String[] attNames;
0320: private int nAttributes;
0321: private int idAttributeIndex;
0322: private boolean[] defaultSpecified;
0323:
0324: EntityParser(OpenEntity entity, EntityManager entityManager,
0325: Application app, Locale locale, EntityParser parent)
0326: throws IOException {
0327: this .in = entity.getInputStream();
0328: this .app = app;
0329: this .locale = locale;
0330: this .baseURL = entity.getBase();
0331: this .location = entity.getLocation();
0332: this .entityManager = entityManager;
0333: buf = new byte[READSIZE * 2];
0334: currentTokenStart = bufStart = bufEnd = 0;
0335: while (bufEnd - bufStart < 4 && fill())
0336: ;
0337: enc = Encoding.getInitialEncoding(buf, bufStart, bufEnd, this );
0338: currentTokenStart = bufStart = getTokenEnd();
0339: posOff = bufStart; // ignore the byte order mark in computing columns
0340: if (enc == null)
0341: fatal(MessageId.BAD_INITIAL_BYTES);
0342: String encName = entity.getEncoding();
0343: if (encName != null) {
0344: ignoreDeclEnc = true;
0345: enc = enc.getEncoding(encName);
0346: if (enc == null)
0347: fatal(MessageId.UNSUPPORTED_ENCODING);
0348: }
0349: minBPC = enc.getMinBytesPerChar();
0350: fixBPC = enc.getFixedBytesPerChar();
0351: stringCache = new StringConversionCache(enc);
0352: valueBuf = new Buffer();
0353: dataBuf = new char[INIT_DATA_BUF_SIZE];
0354: internalEnc = Encoding.getInternalEncoding();
0355: internalStringCache = new StringConversionCache(internalEnc);
0356: if (parent != null)
0357: dtd = parent.dtd;
0358: else
0359: dtd = new DTDImpl(baseURL);
0360: }
0361:
0362: private EntityParser(byte[] buf, String entityName,
0363: boolean isParameterEntity, EntityParser parent) {
0364: this .internalEntityName = entityName;
0365: this .isParameterEntity = isParameterEntity;
0366: this .buf = buf;
0367: this .parent = parent;
0368: baseURL = parent.baseURL;
0369: entityManager = parent.entityManager;
0370: currentTokenStart = bufStart = 0;
0371: bufEnd = buf.length;
0372: app = parent.app;
0373: locale = parent.locale;
0374: enc = internalEnc = parent.internalEnc;
0375: stringCache = internalStringCache = parent.internalStringCache;
0376: minBPC = enc.getMinBytesPerChar();
0377: fixBPC = enc.getFixedBytesPerChar();
0378: dtd = parent.dtd;
0379: valueBuf = parent.valueBuf;
0380: dataBuf = parent.dataBuf;
0381: }
0382:
0383: void parseDocumentEntity() throws IOException, ApplicationException {
0384: try {
0385:
0386: try {
0387: app.startDocument();
0388: } catch (RuntimeException e) {
0389: throw e;
0390: } catch (Exception e) {
0391: throw new ApplicationException(e);
0392: }
0393: parseDecls(PrologParser.PROLOG);
0394: parseContent(true);
0395: parseMisc();
0396:
0397: try {
0398: app.endDocument();
0399: } catch (RuntimeException e) {
0400: throw e;
0401: } catch (Exception e) {
0402: throw new ApplicationException(e);
0403: }
0404: } finally {
0405: if (in != null) {
0406: in.close();
0407: in = null;
0408: }
0409: }
0410: }
0411:
0412: private void parseExternalTextEntity() throws IOException,
0413: ApplicationException {
0414: try {
0415: for (;;) {
0416: try {
0417: if (enc
0418: .tokenizeContent(buf, bufStart, bufEnd,
0419: this ) == Encoding.TOK_XML_DECL) {
0420: currentTokenStart = bufStart;
0421: bufStart = getTokenEnd();
0422: handleXmlDecl(true);
0423: }
0424: break;
0425: } catch (InvalidTokenException e) {
0426: break;
0427: } catch (TokenException e) {
0428: if (!fill())
0429: break;
0430: }
0431: }
0432: parseContent(false);
0433: } finally {
0434: if (in != null) {
0435: in.close();
0436: in = null;
0437: }
0438: }
0439: }
0440:
0441: private void handleXmlDecl(boolean notDocumentEntity)
0442: throws NotWellFormedException {
0443: try {
0444: TextDecl textDecl;
0445: if (notDocumentEntity)
0446: textDecl = new TextDecl(enc, buf, currentTokenStart,
0447: bufStart);
0448: else {
0449: XmlDecl xmlDecl = new XmlDecl(enc, buf,
0450: currentTokenStart, bufStart);
0451: dtd.standalone = xmlDecl.isStandalone();
0452: textDecl = xmlDecl;
0453: }
0454: if (!ignoreDeclEnc) {
0455: enc = enc.getEncoding(textDecl.getEncoding());
0456: if (enc == null)
0457: fatal(MessageId.UNSUPPORTED_ENCODING);
0458: if (enc.getMinBytesPerChar() != minBPC)
0459: fatal(MessageId.BAD_DECL_ENCODING);
0460: stringCache.setEncoding(enc);
0461: fixBPC = enc.getFixedBytesPerChar();
0462: }
0463: } catch (InvalidTokenException e) {
0464: currentTokenStart = e.getOffset();
0465: fatal(MessageId.INVALID_XML_DECLARATION);
0466: }
0467: }
0468:
0469: static class DeclState implements MarkupDeclarationEvent,
0470: StartDocumentTypeDeclarationEvent,
0471: EndDocumentTypeDeclarationEvent {
0472: DeclState(byte type, DTD dtd) {
0473: this .type = type;
0474: this .dtd = dtd;
0475: }
0476:
0477: public DTD getDTD() {
0478: return dtd;
0479: }
0480:
0481: public int getType() {
0482: return declType;
0483: }
0484:
0485: public String getName() {
0486: return declName;
0487: }
0488:
0489: public String getAttributeName() {
0490: if (declType != ATTRIBUTE)
0491: return null;
0492: return attributeName;
0493: }
0494:
0495: final byte type;
0496: final DTD dtd;
0497: EntityImpl entity;
0498: ElementTypeImpl elementType;
0499: String attributeName;
0500: byte attributeType;
0501: StringBuffer contentSpec = new StringBuffer();
0502: Vector allowedValues;
0503: String declName;
0504: int declType = -1;
0505: }
0506:
0507: private void parseDecls(byte type) throws IOException,
0508: ApplicationException {
0509: PrologParser pp = new PrologParser(type);
0510: DeclState declState = new DeclState(type, dtd);
0511: try {
0512: for (;;) {
0513: int tok;
0514: try {
0515: tok = tokenizeProlog();
0516: } catch (EndOfPrologException e) {
0517: if (type != PrologParser.PROLOG)
0518: fatal(MessageId.SYNTAX_ERROR);
0519: pp.end();
0520: break;
0521: } catch (EmptyTokenException e) {
0522: if (type == PrologParser.PROLOG) {
0523: currentTokenStart = bufStart;
0524: fatal(MessageId.NO_DOCUMENT_ELEMENT);
0525: }
0526: pp.end();
0527: break;
0528: }
0529: prologAction(pp.action(tok, buf, currentTokenStart,
0530: bufStart, enc), pp, declState);
0531: }
0532: } catch (PrologSyntaxException e) {
0533: fatal(MessageId.SYNTAX_ERROR);
0534: } finally {
0535: if (type == PrologParser.EXTERNAL_ENTITY && in != null) {
0536: in.close();
0537: in = null;
0538: }
0539: }
0540: if (type == PrologParser.PROLOG) {
0541: try {
0542: app.endProlog(this );
0543: } catch (RuntimeException e) {
0544: throw e;
0545: } catch (Exception e) {
0546: throw new ApplicationException(e);
0547: }
0548: }
0549: }
0550:
0551: void parseInnerParamEntity(PrologParser pp, DeclState declState)
0552: throws IOException, ApplicationException {
0553: int groupLevel = pp.getGroupLevel();
0554: try {
0555: for (;;) {
0556: int tok = tokenizeProlog();
0557: prologAction(pp.action(tok, buf, currentTokenStart,
0558: bufStart, enc), pp, declState);
0559: if (tok == Encoding.TOK_DECL_CLOSE)
0560: fatal(MessageId.PE_DECL_NESTING);
0561: }
0562: } catch (EndOfPrologException e) {
0563: fatal(MessageId.SYNTAX_ERROR);
0564: } catch (PrologSyntaxException e) {
0565: fatal(MessageId.SYNTAX_ERROR);
0566: } catch (EmptyTokenException e) {
0567: }
0568: if (pp.getGroupLevel() != groupLevel)
0569: fatal(MessageId.PE_GROUP_NESTING);
0570: }
0571:
0572: void prologAction(int action, PrologParser pp, DeclState declState)
0573: throws IOException, ApplicationException {
0574: String name;
0575: switch (action) {
0576: case PrologParser.ACTION_XML_DECL:
0577: handleXmlDecl(false);
0578: break;
0579: case PrologParser.ACTION_TEXT_DECL:
0580: handleXmlDecl(true);
0581: break;
0582: case PrologParser.ACTION_ENTITY_PUBLIC_ID:
0583: case PrologParser.ACTION_DOCTYPE_PUBLIC_ID:
0584: case PrologParser.ACTION_NOTATION_PUBLIC_ID:
0585: try {
0586: String id = enc.getPublicId(buf, currentTokenStart,
0587: bufStart);
0588: declState.entity.publicId = id;
0589: } catch (InvalidTokenException e) {
0590: currentTokenStart = e.getOffset();
0591: fatal(MessageId.PUBID_CHAR);
0592: }
0593: break;
0594: case PrologParser.ACTION_DOCTYPE_NAME:
0595: dtd.name = stringCache.convert(buf, currentTokenStart,
0596: bufStart, true);
0597: dtd.externalSubset = new EntityImpl();
0598: declState.entity = dtd.externalSubset;
0599: break;
0600: case PrologParser.ACTION_NOTATION_NAME:
0601: declState.declType = MarkupDeclarationEvent.NOTATION;
0602: startEntityDecl(dtd.notationTable, declState);
0603: break;
0604: case PrologParser.ACTION_GENERAL_ENTITY_NAME:
0605: declState.declType = MarkupDeclarationEvent.GENERAL_ENTITY;
0606: startEntityDecl(dtd.generalEntityTable, declState);
0607: break;
0608: case PrologParser.ACTION_PARAM_ENTITY_NAME:
0609: declState.declType = MarkupDeclarationEvent.PARAMETER_ENTITY;
0610: startEntityDecl(dtd.paramEntityTable, declState);
0611: break;
0612: case PrologParser.ACTION_ENTITY_VALUE_NO_PEREFS:
0613: case PrologParser.ACTION_ENTITY_VALUE_WITH_PEREFS:
0614: byte[] text = makeReplacementText(action == PrologParser.ACTION_ENTITY_VALUE_WITH_PEREFS);
0615: if (declState.entity != null)
0616: declState.entity.text = text;
0617: break;
0618: case PrologParser.ACTION_NOTATION_SYSTEM_ID:
0619: case PrologParser.ACTION_ENTITY_SYSTEM_ID:
0620: case PrologParser.ACTION_DOCTYPE_SYSTEM_ID:
0621: if (declState.entity != null) {
0622: declState.entity.systemId = stringCache.convert(buf,
0623: currentTokenStart + minBPC, bufStart - minBPC,
0624: false);
0625: declState.entity.baseURL = baseURL;
0626: }
0627: break;
0628: case PrologParser.ACTION_ENTITY_NOTATION_NAME:
0629: if (declState.entity != null)
0630: declState.entity.notationName = stringCache.convert(
0631: buf, currentTokenStart, bufStart, true);
0632: break;
0633: case PrologParser.ACTION_DOCTYPE_SUBSET:
0634: dtd.hasInternalSubset = true;
0635: reportStartDocumentTypeDeclaration(declState);
0636: break;
0637: case PrologParser.ACTION_DOCTYPE_CLOSE:
0638: if (!dtd.hasInternalSubset)
0639: reportStartDocumentTypeDeclaration(declState);
0640: if (dtd.externalSubset != null
0641: && dtd.externalSubset.systemId != null) {
0642: if (!dtd.standalone && !forceStandalone) {
0643: OpenEntity openEntity = entityManager.open(
0644: dtd.externalSubset.systemId, baseURL,
0645: dtd.externalSubset.publicId);
0646: if (openEntity != null) {
0647: try {
0648: app
0649: .startEntityReference(startExternalSubsetEvent);
0650: } catch (RuntimeException e) {
0651: throw e;
0652: } catch (Exception e) {
0653: throw new ApplicationException(e);
0654: }
0655: new EntityParser(openEntity, entityManager,
0656: app, locale, this )
0657: .parseDecls(PrologParser.EXTERNAL_ENTITY);
0658: reportEndEntityReference();
0659: reportEndDocumentTypeDeclaration(declState);
0660: return;
0661: }
0662: }
0663: dtd.complete = false;
0664: }
0665: reportEndDocumentTypeDeclaration(declState);
0666: break;
0667: case PrologParser.ACTION_INNER_PARAM_ENTITY_REF:
0668: case PrologParser.ACTION_OUTER_PARAM_ENTITY_REF:
0669: nameStart = currentTokenStart + minBPC;
0670: name = stringCache.convert(buf, nameStart, getNameEnd(),
0671: true);
0672: EntityImpl entity = (EntityImpl) dtd.paramEntityTable
0673: .get(name);
0674: if (entity == null) {
0675: if (dtd.complete)
0676: fatal(MessageId.UNDEF_PEREF, name);
0677: break;
0678: }
0679: EntityParser parser = makeParserForEntity(entity, name,
0680: true);
0681: if (parser == null || dtd.standalone || forceStandalone) {
0682: dtd.complete = false;
0683: break;
0684: }
0685: entity.open = true;
0686: if (action == PrologParser.ACTION_OUTER_PARAM_ENTITY_REF) {
0687: reportStartEntityReference();
0688: parser
0689: .parseDecls(entity.text != null ? PrologParser.INTERNAL_ENTITY
0690: : PrologParser.EXTERNAL_ENTITY);
0691: reportEndEntityReference();
0692: } else
0693: parser.parseInnerParamEntity(pp, declState);
0694: entity.open = false;
0695: break;
0696: /* Default attribute processing. */
0697: case PrologParser.ACTION_ATTLIST_ELEMENT_NAME:
0698: String gi = stringCache.convert(buf, currentTokenStart,
0699: bufStart, true);
0700: declState.declType = MarkupDeclarationEvent.ATTRIBUTE;
0701: declState.declName = gi;
0702: declState.elementType = (ElementTypeImpl) dtd.elementTypeTable
0703: .get(gi);
0704: if (declState.elementType == null) {
0705: declState.elementType = new ElementTypeImpl();
0706: dtd.elementTypeTable.put(gi, declState.elementType);
0707: }
0708: break;
0709: case PrologParser.ACTION_ATTRIBUTE_NAME:
0710: declState.attributeName = stringCache.convert(buf,
0711: currentTokenStart, bufStart, true);
0712: declState.allowedValues = null;
0713: break;
0714: case PrologParser.ACTION_ATTRIBUTE_TYPE_CDATA:
0715: declState.attributeType = AttributeDefinition.CDATA;
0716: break;
0717: case PrologParser.ACTION_ATTRIBUTE_TYPE_ID:
0718: declState.attributeType = AttributeDefinition.ID;
0719: break;
0720: case PrologParser.ACTION_ATTRIBUTE_TYPE_IDREF:
0721: declState.attributeType = AttributeDefinition.IDREF;
0722: break;
0723: case PrologParser.ACTION_ATTRIBUTE_TYPE_IDREFS:
0724: declState.attributeType = AttributeDefinition.IDREFS;
0725: break;
0726: case PrologParser.ACTION_ATTRIBUTE_TYPE_ENTITY:
0727: declState.attributeType = AttributeDefinition.ENTITY;
0728: break;
0729: case PrologParser.ACTION_ATTRIBUTE_TYPE_ENTITIES:
0730: declState.attributeType = AttributeDefinition.ENTITIES;
0731: break;
0732: case PrologParser.ACTION_ATTRIBUTE_TYPE_NMTOKEN:
0733: declState.attributeType = AttributeDefinition.NMTOKEN;
0734: break;
0735: case PrologParser.ACTION_ATTRIBUTE_TYPE_NMTOKENS:
0736: declState.attributeType = AttributeDefinition.NMTOKENS;
0737: break;
0738: case PrologParser.ACTION_IMPLIED_ATTRIBUTE_VALUE:
0739: case PrologParser.ACTION_REQUIRED_ATTRIBUTE_VALUE:
0740: if (declState.elementType
0741: .appendAttribute(
0742: declState.attributeName,
0743: null,
0744: null,
0745: action == PrologParser.ACTION_REQUIRED_ATTRIBUTE_VALUE,
0746: declState.attributeType,
0747: declState.allowedValues))
0748: reportMarkupDeclaration(declState);
0749: break;
0750: case PrologParser.ACTION_DEFAULT_ATTRIBUTE_VALUE:
0751: case PrologParser.ACTION_FIXED_ATTRIBUTE_VALUE:
0752: if (declState.elementType
0753: .appendAttribute(
0754: declState.attributeName,
0755: makeAttributeValue(
0756: declState.attributeType == AttributeDefinition.CDATA,
0757: buf, currentTokenStart + minBPC,
0758: bufStart - minBPC),
0759: normalizeNewlines(stringCache.convert(buf,
0760: currentTokenStart + minBPC,
0761: bufStart - minBPC, false)),
0762: action == PrologParser.ACTION_FIXED_ATTRIBUTE_VALUE,
0763: declState.attributeType,
0764: declState.allowedValues))
0765: reportMarkupDeclaration(declState);
0766: break;
0767: case PrologParser.ACTION_ATTRIBUTE_ENUM_VALUE:
0768: case PrologParser.ACTION_ATTRIBUTE_NOTATION_VALUE:
0769: if (action == PrologParser.ACTION_ATTRIBUTE_NOTATION_VALUE)
0770: declState.attributeType = AttributeDefinition.NOTATION;
0771: else
0772: declState.attributeType = AttributeDefinition.ENUM;
0773: if (declState.allowedValues == null)
0774: declState.allowedValues = new Vector();
0775: declState.allowedValues.addElement(stringCache.convert(buf,
0776: currentTokenStart, bufStart, true));
0777: break;
0778: case PrologParser.ACTION_PI:
0779: nameStart = currentTokenStart + minBPC + minBPC;
0780: reportProcessingInstruction();
0781: break;
0782: case PrologParser.ACTION_COMMENT:
0783: reportComment();
0784: break;
0785: case PrologParser.ACTION_IGNORE_SECT:
0786: skipIgnoreSect();
0787: break;
0788: case PrologParser.ACTION_ELEMENT_NAME:
0789: name = stringCache.convert(buf, currentTokenStart,
0790: bufStart, true);
0791: declState.elementType = (ElementTypeImpl) dtd.elementTypeTable
0792: .get(name);
0793: if (declState.elementType == null) {
0794: declState.elementType = new ElementTypeImpl();
0795: dtd.elementTypeTable.put(name, declState.elementType);
0796: }
0797: declState.declName = name;
0798: declState.declType = MarkupDeclarationEvent.ELEMENT;
0799: declState.contentSpec.setLength(0);
0800: declState.elementType
0801: .setContentType(ElementType.ELEMENT_CONTENT);
0802: break;
0803: case PrologParser.ACTION_CONTENT_ANY:
0804: declState.elementType
0805: .setContentType(ElementType.ANY_CONTENT);
0806: declState.elementType.setContentSpec("ANY");
0807: break;
0808: case PrologParser.ACTION_CONTENT_EMPTY:
0809: declState.elementType
0810: .setContentType(ElementType.EMPTY_CONTENT);
0811: declState.elementType.setContentSpec("EMPTY");
0812: break;
0813: case PrologParser.ACTION_CONTENT_PCDATA:
0814: declState.elementType
0815: .setContentType(ElementType.MIXED_CONTENT);
0816: declState.contentSpec.append("#PCDATA");
0817: break;
0818: case PrologParser.ACTION_GROUP_OPEN:
0819: case PrologParser.ACTION_GROUP_CHOICE:
0820: case PrologParser.ACTION_GROUP_SEQUENCE:
0821: case PrologParser.ACTION_CONTENT_ELEMENT:
0822: case PrologParser.ACTION_CONTENT_ELEMENT_REP:
0823: case PrologParser.ACTION_CONTENT_ELEMENT_OPT:
0824: case PrologParser.ACTION_CONTENT_ELEMENT_PLUS:
0825: declState.contentSpec.append(stringCache.convert(buf,
0826: currentTokenStart, bufStart, false));
0827: break;
0828: case PrologParser.ACTION_GROUP_CLOSE:
0829: case PrologParser.ACTION_GROUP_CLOSE_REP:
0830: case PrologParser.ACTION_GROUP_CLOSE_OPT:
0831: case PrologParser.ACTION_GROUP_CLOSE_PLUS:
0832: declState.contentSpec.append(stringCache.convert(buf,
0833: currentTokenStart, bufStart, false));
0834: if (pp.getGroupLevel() == 0)
0835: declState.elementType
0836: .setContentSpec(declState.contentSpec
0837: .toString());
0838: break;
0839: case PrologParser.ACTION_DECL_CLOSE:
0840: if (declState.declType >= 0
0841: && declState.declType != MarkupDeclarationEvent.ATTRIBUTE)
0842: reportMarkupDeclaration(declState);
0843: declState.declType = -1;
0844: break;
0845: }
0846: }
0847:
0848: private final void startEntityDecl(Hashtable table,
0849: DeclState declState) {
0850: String name = stringCache.convert(buf, currentTokenStart,
0851: bufStart, true);
0852: declState.entity = (EntityImpl) table.get(name);
0853: if (declState.entity == null) {
0854: declState.entity = new EntityImpl();
0855: table.put(name, declState.entity);
0856: declState.declName = name;
0857: } else {
0858: declState.entity = null;
0859: declState.declType = -1;
0860: }
0861: }
0862:
0863: private final void skipIgnoreSect() throws IOException {
0864: for (;;) {
0865: try {
0866: bufStart = enc.skipIgnoreSect(buf, bufStart, bufEnd);
0867: return;
0868: } catch (PartialTokenException e) {
0869: if (!fill()) {
0870: currentTokenStart = bufStart;
0871: fatal(MessageId.UNCLOSED_CONDITIONAL_SECTION);
0872: }
0873: } catch (InvalidTokenException e) {
0874: currentTokenStart = e.getOffset();
0875: fatal(MessageId.IGNORE_SECT_CHAR);
0876: }
0877: }
0878: }
0879:
0880: private final void parseContent(boolean oneElement)
0881: throws IOException, ApplicationException {
0882: byte[] buf = this .buf;
0883: int bufEnd = this .bufEnd;
0884: int bufStart = this .bufStart;
0885: Encoding enc = this .enc;
0886: int nOpenElements = 0;
0887: byte[] openElementNameBuf = new byte[64];
0888: // Indexed by nOpenElements
0889: int[] openElementNameStart = new int[8];
0890: openElementNameStart[0] = 0;
0891: for (;;) {
0892: try {
0893: switch (enc
0894: .tokenizeContent(buf, bufStart, bufEnd, this )) {
0895: case Encoding.TOK_START_TAG_WITH_ATTS:
0896: storeAtts();
0897: /* fall through */
0898: case Encoding.TOK_START_TAG_NO_ATTS:
0899: if (nOpenElements + 1 >= openElementNameStart.length)
0900: openElementNameStart = grow(openElementNameStart);
0901: nameStart = bufStart + minBPC;
0902: nAttributes = -1;
0903: /* Update currentTokenStart so that getLocation works. */
0904: currentTokenStart = bufStart;
0905: try {
0906: app.startElement(this );
0907: } catch (RuntimeException e) {
0908: throw e;
0909: } catch (Exception e) {
0910: throw new ApplicationException(e);
0911: }
0912: int nameLength = getNameEnd() - nameStart;
0913: int nameBufEnd = openElementNameStart[nOpenElements];
0914: if ((openElementNameStart[nOpenElements + 1] = nameBufEnd
0915: + nameLength) > openElementNameBuf.length) {
0916: byte[] tem = new byte[(openElementNameBuf.length << 1)
0917: + nameLength];
0918: System.arraycopy(openElementNameBuf, 0, tem, 0,
0919: openElementNameStart[nOpenElements]);
0920: openElementNameBuf = tem;
0921: }
0922: copyBytes(buf, nameStart, openElementNameBuf,
0923: nameBufEnd, nameLength);
0924: nOpenElements++;
0925: break;
0926: case Encoding.TOK_EMPTY_ELEMENT_WITH_ATTS:
0927: storeAtts();
0928: /* fall through */
0929: case Encoding.TOK_EMPTY_ELEMENT_NO_ATTS:
0930: nameStart = bufStart + minBPC;
0931: nAttributes = -1;
0932:
0933: /* Update currentTokenStart so that getLocation works. */
0934: currentTokenStart = bufStart;
0935: try {
0936: app.startElement(this );
0937: app.endElement(this );
0938: } catch (RuntimeException e) {
0939: throw e;
0940: } catch (Exception e) {
0941: throw new ApplicationException(e);
0942: }
0943: if (oneElement && nOpenElements == 0) {
0944: this .bufStart = getTokenEnd();
0945: return;
0946: }
0947: break;
0948: case Encoding.TOK_END_TAG:
0949: if (nOpenElements == 0) {
0950: currentTokenStart = bufStart;
0951: fatal(MessageId.INVALID_END_TAG);
0952: }
0953: --nOpenElements;
0954: nameStart = bufStart + 2 * minBPC;
0955: if (!bytesEqual(openElementNameBuf,
0956: openElementNameStart[nOpenElements],
0957: openElementNameStart[nOpenElements + 1],
0958: buf, nameStart, getNameEnd())) {
0959: String expected = stringCache
0960: .convert(
0961: openElementNameBuf,
0962: openElementNameStart[nOpenElements],
0963: openElementNameStart[nOpenElements + 1],
0964: false);
0965: String got = stringCache.convert(buf,
0966: nameStart, getNameEnd(), false);
0967: currentTokenStart = bufStart;
0968: fatal(MessageId.MISMATCHED_END_TAG, got,
0969: expected);
0970: }
0971: try {
0972: app.endElement(this );
0973: } catch (RuntimeException e) {
0974: throw e;
0975: } catch (Exception e) {
0976: throw new ApplicationException(e);
0977: }
0978: if (oneElement && nOpenElements == 0) {
0979: this .bufStart = getTokenEnd();
0980: return;
0981: }
0982: break;
0983: case Encoding.TOK_DATA_CHARS:
0984: data = null;
0985: this .bufStart = bufStart;
0986: reportCharacterData();
0987: break;
0988: case Encoding.TOK_DATA_NEWLINE:
0989: dataBuf[0] = '\n';
0990: dataLength = 1;
0991: data = dataBuf;
0992: reportCharacterData();
0993: break;
0994: case Encoding.TOK_MAGIC_ENTITY_REF:
0995: case Encoding.TOK_CHAR_REF:
0996: dataBuf[0] = getRefChar();
0997: dataLength = 1;
0998: data = dataBuf;
0999: dataIsRef = true;
1000: reportCharacterData();
1001: dataIsRef = false;
1002: break;
1003: case Encoding.TOK_CHAR_PAIR_REF:
1004: getRefCharPair(dataBuf, 0);
1005: data = dataBuf;
1006: dataLength = 2;
1007: dataIsRef = true;
1008: reportCharacterData();
1009: dataIsRef = false;
1010: break;
1011: case Encoding.TOK_CDATA_SECT_OPEN:
1012: currentTokenStart = bufStart;
1013: try {
1014: app.startCdataSection(this );
1015: } catch (RuntimeException e) {
1016: throw e;
1017: } catch (Exception e) {
1018: throw new ApplicationException(e);
1019: }
1020: this .bufStart = getTokenEnd();
1021: parseCdataSection();
1022: buf = this .buf;
1023: bufStart = this .bufStart;
1024: bufEnd = this .bufEnd;
1025: break;
1026: case Encoding.TOK_ENTITY_REF: {
1027: nameStart = bufStart + minBPC;
1028: String name = stringCache.convert(buf, nameStart,
1029: getNameEnd(), true);
1030: EntityImpl entity = (EntityImpl) dtd.generalEntityTable
1031: .get(name);
1032: if (entity == null) {
1033: if (dtd.complete || dtd.standalone) {
1034: currentTokenStart = bufStart;
1035: fatal(MessageId.UNDEF_REF, name);
1036: }
1037: break;
1038: }
1039: EntityParser parser = makeParserForEntity(entity,
1040: name, false);
1041: if (parser == null)
1042: break;
1043: reportStartEntityReference();
1044: entity.open = true;
1045: if (entity.text != null) {
1046: currentTokenStart = this .bufStart = bufStart;
1047: parser.parseContent(false);
1048: } else
1049: parser.parseExternalTextEntity();
1050: reportEndEntityReference();
1051: entity.open = false;
1052: break;
1053: }
1054: case Encoding.TOK_PI:
1055: nameStart = bufStart + minBPC * 2;
1056: currentTokenStart = bufStart;
1057: reportProcessingInstruction();
1058: break;
1059: case Encoding.TOK_COMMENT:
1060: currentTokenStart = bufStart;
1061: reportComment();
1062: break;
1063: case Encoding.TOK_XML_DECL:
1064: currentTokenStart = bufStart;
1065: fatal(MessageId.MISPLACED_XML_DECL);
1066: }
1067: bufStart = getTokenEnd();
1068: } catch (EmptyTokenException e) {
1069: this .bufStart = bufStart;
1070: if (!fill()) {
1071: if (oneElement || nOpenElements > 0) {
1072: currentTokenStart = this .bufStart;
1073: fatal(MessageId.MISSING_END_TAG);
1074: }
1075: return;
1076: }
1077: buf = this .buf;
1078: bufStart = this .bufStart;
1079: bufEnd = this .bufEnd;
1080: } catch (PartialTokenException e) {
1081: this .bufStart = bufStart;
1082: if (!fill()) {
1083: currentTokenStart = this .bufStart;
1084: fatal(MessageId.UNCLOSED_TOKEN);
1085: }
1086: buf = this .buf;
1087: bufStart = this .bufStart;
1088: bufEnd = this .bufEnd;
1089: } catch (ExtensibleTokenException e) {
1090: this .bufStart = bufStart;
1091: if (!fill()) {
1092: if (oneElement || nOpenElements > 0) {
1093: currentTokenStart = this .bufStart;
1094: fatal(MessageId.MISSING_END_TAG);
1095: }
1096: switch (e.getTokenType()) {
1097: case Encoding.TOK_DATA_NEWLINE:
1098: dataBuf[0] = '\n';
1099: dataLength = 1;
1100: data = dataBuf;
1101: reportCharacterData();
1102: break;
1103: case Encoding.TOK_DATA_CHARS:
1104: data = null;
1105: setTokenEnd(this .bufEnd);
1106: reportCharacterData();
1107: break;
1108: default:
1109: throw new Error("extensible token botch");
1110: }
1111: return;
1112: }
1113: buf = this .buf;
1114: bufStart = this .bufStart;
1115: bufEnd = this .bufEnd;
1116: } catch (InvalidTokenException e) {
1117: currentTokenStart = e.getOffset();
1118: reportInvalidToken(e);
1119: }
1120: }
1121: }
1122:
1123: private final void parseCdataSection() throws IOException,
1124: InvalidTokenException, ApplicationException {
1125: for (;;) {
1126: try {
1127: switch (enc.tokenizeCdataSection(buf, bufStart, bufEnd,
1128: this )) {
1129: case Encoding.TOK_DATA_CHARS:
1130: data = null;
1131: reportCharacterData();
1132: break;
1133: case Encoding.TOK_DATA_NEWLINE:
1134: dataBuf[0] = '\n';
1135: dataLength = 1;
1136: data = dataBuf;
1137: reportCharacterData();
1138: break;
1139: case Encoding.TOK_CDATA_SECT_CLOSE:
1140: currentTokenStart = bufStart;
1141: try {
1142: app.endCdataSection(this );
1143: } catch (RuntimeException e) {
1144: throw e;
1145: } catch (Exception e) {
1146: throw new ApplicationException(e);
1147: }
1148: return;
1149: }
1150: bufStart = getTokenEnd();
1151: } catch (InvalidTokenException e) {
1152: throw e;
1153: } catch (TokenException e) {
1154: if (!fill()) {
1155: currentTokenStart = this .bufStart;
1156: fatal(MessageId.UNCLOSED_CDATA_SECTION);
1157: }
1158: }
1159: }
1160: }
1161:
1162: private EntityParser makeParserForEntity(EntityImpl entity,
1163: String name, boolean isParameter) throws IOException {
1164: if (entity.open)
1165: fatal(MessageId.RECURSION);
1166: if (entity.notationName != null)
1167: fatal(MessageId.UNPARSED_REF);
1168: if (entity.text != null)
1169: return new EntityParser(entity.text, name, isParameter,
1170: this );
1171: OpenEntity openEntity = entityManager.open(entity.systemId,
1172: entity.baseURL, entity.publicId);
1173: if (openEntity == null)
1174: return null;
1175: return new EntityParser(openEntity, entityManager, app, locale,
1176: this );
1177: }
1178:
1179: private final void storeAtts() throws NotWellFormedException {
1180: int i = getAttributeSpecifiedCount();
1181: ElementTypeImpl elementType = null;
1182: boolean gotElementType = false;
1183: while (i != 0) {
1184: --i;
1185: if (!isAttributeNormalized(i)) {
1186: valueBuf.clear();
1187: if (!gotElementType)
1188: elementType = (ElementTypeImpl) dtd.elementTypeTable
1189: .get(stringCache.convert(buf, bufStart
1190: + minBPC, getNameEnd(), true));
1191: boolean isCdata;
1192: if (elementType != null) {
1193: String attName = stringCache.convert(buf,
1194: getAttributeNameStart(i),
1195: getAttributeNameEnd(i), true);
1196: isCdata = elementType.isAttributeCdata(attName);
1197: } else
1198: isCdata = true;
1199: String val = makeAttributeValue(isCdata, buf,
1200: getAttributeValueStart(i),
1201: getAttributeValueEnd(i));
1202: setAttributeValue(i, val);
1203: }
1204: }
1205: }
1206:
1207: private String makeAttributeValue(boolean isCdata, byte[] buf,
1208: int start, int end) throws NotWellFormedException {
1209:
1210: /* appendAttributeValue will trash currentTokenStart. */
1211: int saveCurrentTokenStart = currentTokenStart;
1212: int saveNameEnd = getNameEnd();
1213: valueBuf.clear();
1214: appendAttributeValue(isCdata, start, end, valueBuf);
1215: if (!isCdata && valueBuf.length() > 0
1216: && valueBuf.charAt(valueBuf.length() - 1) == ' ')
1217: valueBuf.chop();
1218: currentTokenStart = saveCurrentTokenStart;
1219: setNameEnd(saveNameEnd);
1220: return valueBuf.toString();
1221: }
1222:
1223: private void appendAttributeValue(boolean isCdata, int start,
1224: int end, Buffer valueBuf) throws NotWellFormedException {
1225: Token t = new Token();
1226: try {
1227: for (;;) {
1228: int tok;
1229: int nextStart;
1230: try {
1231: tok = enc
1232: .tokenizeAttributeValue(buf, start, end, t);
1233: nextStart = t.getTokenEnd();
1234: } catch (ExtensibleTokenException e) {
1235: tok = e.getTokenType();
1236: nextStart = end;
1237: }
1238: currentTokenStart = start;
1239: switch (tok) {
1240: case Encoding.TOK_DATA_CHARS:
1241: valueBuf.append(enc, buf, start, t.getTokenEnd());
1242: break;
1243: case Encoding.TOK_MAGIC_ENTITY_REF:
1244: case Encoding.TOK_CHAR_REF:
1245: if (isCdata
1246: || t.getRefChar() != ' '
1247: || (valueBuf.length() > 0 && valueBuf
1248: .charAt(valueBuf.length() - 1) != ' '))
1249: valueBuf.append(t.getRefChar());
1250: break;
1251: case Encoding.TOK_CHAR_PAIR_REF:
1252: valueBuf.appendRefCharPair(t);
1253: break;
1254: case Encoding.TOK_ATTRIBUTE_VALUE_S:
1255: case Encoding.TOK_DATA_NEWLINE:
1256: if (isCdata
1257: || (valueBuf.length() > 0 && valueBuf
1258: .charAt(valueBuf.length() - 1) != ' '))
1259: valueBuf.append(' ');
1260: break;
1261: case Encoding.TOK_ENTITY_REF:
1262: String name = stringCache.convert(buf, start
1263: + minBPC, t.getTokenEnd() - minBPC, true);
1264: EntityImpl entity = (EntityImpl) dtd.generalEntityTable
1265: .get(name);
1266: if (entity == null) {
1267: if (dtd.complete || dtd.standalone)
1268: fatal(MessageId.UNDEF_REF, name);
1269: break;
1270: }
1271: if (entity.systemId != null)
1272: fatal(MessageId.EXTERN_REF_ATTVAL);
1273: try {
1274: EntityParser parser = makeParserForEntity(
1275: entity, name, false);
1276: entity.open = true;
1277: parser.appendAttributeValue(isCdata, 0,
1278: parser.bufEnd, valueBuf);
1279: entity.open = false;
1280: } catch (NotWellFormedException e) {
1281: throw e;
1282: } catch (IOException e) {
1283: // Shouldn't happen since the entity is internal.
1284: throw new Error("unexpected IOException");
1285: }
1286: break;
1287: default:
1288: throw new Error("attribute value botch");
1289: }
1290: start = nextStart;
1291: }
1292: } catch (PartialTokenException e) {
1293: currentTokenStart = end;
1294: fatal(MessageId.NOT_WELL_FORMED);
1295: } catch (InvalidTokenException e) {
1296: currentTokenStart = e.getOffset();
1297: reportInvalidToken(e);
1298: } catch (EmptyTokenException e) {
1299: }
1300: }
1301:
1302: /*
1303: * Make the replacement text for an entity out of the literal in the
1304: * current token.
1305: */
1306: private byte[] makeReplacementText(boolean allowPerefs)
1307: throws IOException {
1308: valueBuf.clear();
1309: Token t = new Token();
1310: int start = currentTokenStart + minBPC;
1311: final int end = bufStart - minBPC;
1312: try {
1313: for (;;) {
1314: int tok;
1315: int nextStart;
1316: try {
1317: tok = enc.tokenizeEntityValue(buf, start, end, t);
1318: nextStart = t.getTokenEnd();
1319: } catch (ExtensibleTokenException e) {
1320: tok = e.getTokenType();
1321: nextStart = end;
1322: }
1323: if (tok == Encoding.TOK_PARAM_ENTITY_REF
1324: && !allowPerefs) {
1325: currentTokenStart = start;
1326: fatal(MessageId.INTERNAL_PEREF_ENTVAL);
1327: }
1328: handleEntityValueToken(valueBuf, tok, start, nextStart,
1329: t);
1330: start = nextStart;
1331: }
1332: } catch (PartialTokenException e) {
1333: currentTokenStart = end;
1334: fatal(MessageId.NOT_WELL_FORMED);
1335: } catch (InvalidTokenException e) {
1336: currentTokenStart = e.getOffset();
1337: reportInvalidToken(e);
1338: } catch (EmptyTokenException e) {
1339: }
1340:
1341: return valueBuf.getBytes();
1342: }
1343:
1344: private void parseEntityValue(Buffer value) throws IOException {
1345: final Token t = new Token();
1346: for (;;) {
1347: int tok;
1348: for (;;) {
1349: try {
1350: tok = enc.tokenizeEntityValue(buf, bufStart,
1351: bufEnd, t);
1352: currentTokenStart = bufStart;
1353: bufStart = t.getTokenEnd();
1354: break;
1355: } catch (EmptyTokenException e) {
1356: if (!fill())
1357: return;
1358: } catch (PartialTokenException e) {
1359: if (!fill()) {
1360: currentTokenStart = bufStart;
1361: bufStart = bufEnd;
1362: fatal(MessageId.UNCLOSED_TOKEN);
1363: }
1364: } catch (ExtensibleTokenException e) {
1365: if (!fill()) {
1366: currentTokenStart = bufStart;
1367: bufStart = bufEnd;
1368: tok = e.getTokenType();
1369: break;
1370: }
1371: } catch (InvalidTokenException e) {
1372: currentTokenStart = e.getOffset();
1373: reportInvalidToken(e);
1374: }
1375: }
1376: handleEntityValueToken(value, tok, currentTokenStart,
1377: bufStart, t);
1378: }
1379: }
1380:
1381: private void handleEntityValueToken(Buffer value, int tok,
1382: int start, int end, Token t) throws IOException {
1383: switch (tok) {
1384: case Encoding.TOK_DATA_CHARS:
1385: case Encoding.TOK_ENTITY_REF:
1386: case Encoding.TOK_MAGIC_ENTITY_REF:
1387: value.append(enc, buf, start, end);
1388: break;
1389: case Encoding.TOK_CHAR_REF:
1390: value.append(t.getRefChar());
1391: break;
1392: case Encoding.TOK_CHAR_PAIR_REF:
1393: value.appendRefCharPair(t);
1394: break;
1395: case Encoding.TOK_DATA_NEWLINE:
1396: value.append('\n');
1397: break;
1398: case Encoding.TOK_PARAM_ENTITY_REF:
1399: String name = stringCache.convert(buf, start + minBPC, end
1400: - minBPC, true);
1401: EntityImpl entity = (EntityImpl) dtd.paramEntityTable
1402: .get(name);
1403: if (entity == null) {
1404: if (dtd.complete)
1405: fatal(MessageId.UNDEF_PEREF, name);
1406: break;
1407: }
1408: EntityParser parser = makeParserForEntity(entity, name,
1409: true);
1410: if (parser != null) {
1411: entity.open = true;
1412: parser.parseEntityValue(value);
1413: entity.open = false;
1414: }
1415: break;
1416: default:
1417: throw new Error("replacement text botch");
1418: }
1419: }
1420:
1421: private void parseMisc() throws IOException, ApplicationException {
1422: try {
1423: for (;;) {
1424: switch (tokenizeProlog()) {
1425: case Encoding.TOK_PI:
1426: nameStart = currentTokenStart + minBPC + minBPC;
1427: reportProcessingInstruction();
1428: break;
1429: case Encoding.TOK_COMMENT:
1430: reportComment();
1431: break;
1432: case Encoding.TOK_PROLOG_S:
1433: break;
1434: default:
1435: fatal(MessageId.EPILOG_JUNK);
1436: }
1437: }
1438: } catch (EndOfPrologException e) {
1439: currentTokenStart = bufStart;
1440: fatal(MessageId.ELEMENT_AFTER_DOCUMENT_ELEMENT);
1441: } catch (EmptyTokenException e) {
1442: }
1443: }
1444:
1445: private final int tokenizeProlog() throws IOException,
1446: EmptyTokenException, EndOfPrologException {
1447: for (;;) {
1448: try {
1449: int tok = enc.tokenizeProlog(buf, bufStart, bufEnd,
1450: this );
1451: currentTokenStart = bufStart;
1452: bufStart = getTokenEnd();
1453: return tok;
1454: } catch (EmptyTokenException e) {
1455: if (!fill())
1456: throw e;
1457: } catch (PartialTokenException e) {
1458: if (!fill()) {
1459: currentTokenStart = bufStart;
1460: bufStart = bufEnd;
1461: fatal(MessageId.UNCLOSED_TOKEN);
1462: }
1463: } catch (ExtensibleTokenException e) {
1464: if (!fill()) {
1465: currentTokenStart = bufStart;
1466: bufStart = bufEnd;
1467: return e.getTokenType();
1468: }
1469: } catch (InvalidTokenException e) {
1470: bufStart = currentTokenStart = e.getOffset();
1471: reportInvalidToken(e);
1472: }
1473: }
1474: }
1475:
1476: private static final int[] grow(int[] v) {
1477: int[] tem = v;
1478: v = new int[tem.length << 1];
1479: System.arraycopy(tem, 0, v, 0, tem.length);
1480: return v;
1481: }
1482:
1483: private long getEntityByteIndex(int off) {
1484: return bufEndStreamOffset - (bufEnd - off);
1485: }
1486:
1487: /* The size of the buffer is always a multiple of READSIZE.
1488: We do reads so that a complete read would end at the
1489: end of the buffer. Unless there has been an incomplete
1490: read, we always read in multiples of READSIZE. */
1491: private boolean fill() throws IOException {
1492: if (in == null)
1493: return false;
1494: if (bufEnd == buf.length) {
1495: enc.movePosition(buf, posOff, bufStart, pos);
1496: /* The last read was complete. */
1497: int keep = bufEnd - bufStart;
1498: if (keep == 0)
1499: bufEnd = 0;
1500: else if (keep + READSIZE <= buf.length) {
1501: /*
1502: * There is space in the buffer for at least READSIZE bytes.
1503: * Choose bufEnd so that it is the least non-negative integer
1504: * greater than or equal to <code>keep</code>, such
1505: * <code>bufLength - keep</code> is a multiple of READSIZE.
1506: */
1507: bufEnd = buf.length
1508: - (((buf.length - keep) / READSIZE) * READSIZE);
1509: for (int i = 0; i < keep; i++)
1510: buf[bufEnd - keep + i] = buf[bufStart + i];
1511: } else {
1512: byte newBuf[] = new byte[buf.length << 1];
1513: bufEnd = buf.length;
1514: System.arraycopy(buf, bufStart, newBuf, bufEnd - keep,
1515: keep);
1516: buf = newBuf;
1517: }
1518: bufStart = bufEnd - keep;
1519: posOff = bufStart;
1520: }
1521: int nBytes = in.read(buf, bufEnd, buf.length - bufEnd);
1522: if (nBytes < 0) {
1523: in.close();
1524: in = null;
1525: return false;
1526: }
1527: bufEnd += nBytes;
1528: bufEndStreamOffset += nBytes;
1529: return true;
1530: }
1531:
1532: private void reportInvalidToken(InvalidTokenException e)
1533: throws NotWellFormedException {
1534: switch (e.getType()) {
1535: case InvalidTokenException.DUPLICATE_ATTRIBUTE:
1536: fatal(MessageId.DUPLICATE_ATTRIBUTE);
1537: case InvalidTokenException.XML_TARGET:
1538: fatal(MessageId.XML_TARGET);
1539: }
1540: fatal(MessageId.ILLEGAL_CHAR);
1541: }
1542:
1543: private void fatal(String message) throws NotWellFormedException {
1544: doFatal(message, null);
1545: }
1546:
1547: private void fatal(String message, Object arg)
1548: throws NotWellFormedException {
1549: doFatal(message, new Object[] { arg });
1550: }
1551:
1552: private void fatal(String message, Object arg1, Object arg2)
1553: throws NotWellFormedException {
1554: doFatal(message, new Object[] { arg1, arg2 });
1555: }
1556:
1557: private void doFatal(String id, Object[] args)
1558: throws NotWellFormedException {
1559: if (parent != null)
1560: parent.doFatal(id, args);
1561: if (posOff > currentTokenStart)
1562: throw new Error("positioning botch");
1563: if (enc != null)
1564: enc.movePosition(buf, posOff, currentTokenStart, pos);
1565: posOff = currentTokenStart;
1566: String desc = id;
1567: String message = null;
1568: try {
1569: ResourceBundle resources = ResourceBundle.getBundle(
1570: "com.jclark.xml.parse.Messages", locale);
1571: desc = resources.getString(id);
1572: if (args != null)
1573: desc = MessageFormat.format(desc, args);
1574: Object[] msgArgs = new Object[] { desc, location,
1575: new Integer(pos.getLineNumber()),
1576: new Integer(pos.getColumnNumber()),
1577: new Long(getEntityByteIndex(currentTokenStart)) };
1578: message = MessageFormat.format(resources
1579: .getString(MessageId.MESSAGE_FORMAT), msgArgs);
1580: } catch (MissingResourceException e) {
1581: message = desc;
1582: } catch (IllegalArgumentException e) {
1583: message = desc;
1584: }
1585: throw new NotWellFormedException(message, desc, location,
1586: baseURL, pos.getLineNumber(), pos.getColumnNumber(),
1587: getEntityByteIndex(currentTokenStart));
1588: }
1589:
1590: private static final boolean bytesEqual(byte[] buf1, int start1,
1591: int end1, byte[] buf2, int start2, int end2) {
1592: int len = end1 - start1;
1593: if (end2 - start2 != len)
1594: return false;
1595: for (; len > 0; --len)
1596: if (buf1[start1++] != buf2[start2++])
1597: return false;
1598: return true;
1599: }
1600:
1601: private final static void copyBytes(byte[] from, int fromOff,
1602: byte[] to, int toOff, int len) {
1603: while (--len >= 0) {
1604: to[toOff++] = from[fromOff++];
1605: }
1606: }
1607:
1608: public ParseLocation getLocation() {
1609: if (parent != null)
1610: return parent.getLocation();
1611: if (posOff > currentTokenStart)
1612: throw new Error("positioning botch");
1613: if (enc != null)
1614: enc.movePosition(buf, posOff, currentTokenStart, pos);
1615: posOff = currentTokenStart;
1616: return this ;
1617: }
1618:
1619: public String getEntityLocation() {
1620: return location;
1621: }
1622:
1623: public URL getEntityBase() {
1624: return baseURL;
1625: }
1626:
1627: public long getByteIndex() {
1628: return getEntityByteIndex(currentTokenStart);
1629: }
1630:
1631: public int getLineNumber() {
1632: return pos.getLineNumber();
1633: }
1634:
1635: public int getColumnNumber() {
1636: return pos.getColumnNumber();
1637: }
1638:
1639: public String getName() {
1640: return stringCache.convert(buf, nameStart, getNameEnd(), true);
1641: }
1642:
1643: public DTD getDTD() {
1644: return dtd;
1645: }
1646:
1647: public int getLength() {
1648: if (data == null) {
1649: if (fixBPC != 0)
1650: return (getTokenEnd() - bufStart) / fixBPC;
1651: convertData(bufStart, getTokenEnd());
1652: }
1653: return dataLength;
1654: }
1655:
1656: public boolean isReference() {
1657: return dataIsRef;
1658: }
1659:
1660: public int getLengthMax() {
1661: if (data != null)
1662: return dataLength;
1663: else
1664: return (getTokenEnd() - bufStart) / minBPC;
1665: }
1666:
1667: public int copyChars(char[] cbuf, int off) {
1668: if (data != null) {
1669: System.arraycopy(data, 0, cbuf, off, dataLength);
1670: return dataLength;
1671: } else
1672: return enc.convert(buf, bufStart, getTokenEnd(), cbuf, off);
1673: }
1674:
1675: public void writeChars(Writer writer) throws IOException {
1676: if (data == null)
1677: convertData(bufStart, getTokenEnd());
1678: writer.write(data, 0, dataLength);
1679: }
1680:
1681: private void convertData(int start, int end) {
1682: if (dataBuf == null || dataBuf.length * minBPC < end - start)
1683: dataBuf = new char[(end - start) / minBPC];
1684: dataLength = enc.convert(buf, start, end, dataBuf, 0);
1685: data = dataBuf;
1686: }
1687:
1688: private final void setAttributeValue(int index, String value) {
1689: if (attValues == null)
1690: attValues = new String[index + 10];
1691: else if (index >= attValues.length) {
1692: String[] tem = new String[index << 1];
1693: System.arraycopy(attValues, 0, tem, 0, attValues.length);
1694: attValues = tem;
1695: }
1696: attValues[index] = value;
1697: }
1698:
1699: public final int getAttributeCount() {
1700: if (nAttributes < 0)
1701: buildAttributes();
1702: return nAttributes;
1703: }
1704:
1705: public final int getIdAttributeIndex() {
1706: if (nAttributes < 0)
1707: buildAttributes();
1708: return idAttributeIndex;
1709: }
1710:
1711: public final String getAttributeName(int i) {
1712: if (nAttributes < 0)
1713: buildAttributes();
1714: if (i >= nAttributes)
1715: throw new IndexOutOfBoundsException();
1716: return attNames[i];
1717: }
1718:
1719: public final String getAttributeValue(int i) {
1720: if (nAttributes < 0)
1721: buildAttributes();
1722: if (i < getAttributeSpecifiedCount()) {
1723: if (isAttributeNormalized(i))
1724: return stringCache.convert(buf,
1725: getAttributeValueStart(i),
1726: getAttributeValueEnd(i), false);
1727: } else if (i >= nAttributes)
1728: throw new IndexOutOfBoundsException();
1729: return attValues[i];
1730: }
1731:
1732: public final String getAttributeUnnormalizedValue(int i) {
1733: if (i >= getAttributeSpecifiedCount() || i < 0)
1734: throw new IndexOutOfBoundsException();
1735: return normalizeNewlines(stringCache.convert(buf,
1736: getAttributeValueStart(i), getAttributeValueEnd(i),
1737: false));
1738: }
1739:
1740: public final String getAttributeValue(String name) {
1741: if (nAttributes < 0)
1742: buildAttributes();
1743: for (int i = 0; i < nAttributes; i++) {
1744: if (attNames[i].equals(name)) {
1745: if (i < getAttributeSpecifiedCount()
1746: && isAttributeNormalized(i))
1747: return stringCache.convert(buf,
1748: getAttributeValueStart(i),
1749: getAttributeValueEnd(i), false);
1750: else
1751: return attValues[i];
1752: }
1753: }
1754: return null;
1755: }
1756:
1757: private void buildAttributes() {
1758: ElementTypeImpl elementType = (ElementTypeImpl) dtd.elementTypeTable
1759: .get(getName());
1760: int nSpecAtts = getAttributeSpecifiedCount();
1761:
1762: {
1763: int totalAtts = nSpecAtts;
1764: if (elementType != null)
1765: totalAtts += elementType.getDefaultAttributeCount();
1766: if (attNames == null || totalAtts > attNames.length)
1767: attNames = new String[totalAtts];
1768: }
1769: for (int i = nSpecAtts; --i >= 0;)
1770: attNames[i] = stringCache.convert(buf,
1771: getAttributeNameStart(i), getAttributeNameEnd(i),
1772: true);
1773: nAttributes = nSpecAtts;
1774: idAttributeIndex = -1;
1775: if (elementType != null) {
1776: int nDefaults = elementType.getDefaultAttributeCount();
1777: if (defaultSpecified == null
1778: || nDefaults > defaultSpecified.length)
1779: defaultSpecified = new boolean[nDefaults];
1780: else {
1781: for (int i = 0; i < nDefaults; i++)
1782: defaultSpecified[i] = false;
1783: }
1784: for (int i = nSpecAtts; --i >= 0;) {
1785: int di = elementType
1786: .getAttributeDefaultIndex(attNames[i]);
1787: if (di >= 0)
1788: defaultSpecified[di] = true;
1789: else if (di == ElementTypeImpl.ID_DEFAULT_INDEX)
1790: idAttributeIndex = i;
1791: }
1792: for (int i = 0; i < nDefaults; i++) {
1793: if (!defaultSpecified[i]) {
1794: setAttributeValue(nAttributes, elementType
1795: .getDefaultAttributeValue(i));
1796: attNames[nAttributes] = elementType
1797: .getDefaultAttributeName(i);
1798: ++nAttributes;
1799: }
1800: }
1801: }
1802: }
1803:
1804: public final String getComment() {
1805: return normalizeNewlines(stringCache.convert(buf,
1806: currentTokenStart + 4 * minBPC, getTokenEnd() - 3
1807: * minBPC, false));
1808: }
1809:
1810: public final String getInstruction() {
1811: return normalizeNewlines(stringCache.convert(buf, enc.skipS(
1812: buf, getNameEnd(), getTokenEnd()), getTokenEnd() - 2
1813: * minBPC, false));
1814: }
1815:
1816: private final String normalizeNewlines(String str) {
1817: int i = str.indexOf('\r');
1818: if (i < 0)
1819: return str;
1820: StringBuffer buf = new StringBuffer();
1821: for (i = 0; i < str.length(); i++) {
1822: char c = str.charAt(i);
1823: if (c == '\r') {
1824: buf.append('\n');
1825: if (i + 1 < str.length() && str.charAt(i + 1) == '\n')
1826: i++;
1827: } else
1828: buf.append(c);
1829: }
1830: return buf.toString();
1831: }
1832:
1833: private final void reportCharacterData()
1834: throws ApplicationException {
1835: try {
1836: app.characterData(this );
1837: } catch (RuntimeException e) {
1838: throw e;
1839: } catch (Exception e) {
1840: throw new ApplicationException(e);
1841: }
1842: }
1843:
1844: private final void reportProcessingInstruction()
1845: throws ApplicationException {
1846: try {
1847: app.processingInstruction(this );
1848: } catch (RuntimeException e) {
1849: throw e;
1850: } catch (Exception e) {
1851: throw new ApplicationException(e);
1852: }
1853: }
1854:
1855: private final void reportComment() throws ApplicationException {
1856: try {
1857: app.comment(this );
1858: } catch (RuntimeException e) {
1859: throw e;
1860: } catch (Exception e) {
1861: throw new ApplicationException(e);
1862: }
1863: }
1864:
1865: private final void reportStartEntityReference()
1866: throws ApplicationException {
1867: try {
1868: app.startEntityReference(this );
1869: } catch (RuntimeException e) {
1870: throw e;
1871: } catch (Exception e) {
1872: throw new ApplicationException(e);
1873: }
1874: }
1875:
1876: private final void reportEndEntityReference()
1877: throws ApplicationException {
1878: try {
1879: app.endEntityReference(this );
1880: } catch (RuntimeException e) {
1881: throw e;
1882: } catch (Exception e) {
1883: throw new ApplicationException(e);
1884: }
1885: }
1886:
1887: private final void reportMarkupDeclaration(DeclState declState)
1888: throws ApplicationException {
1889: try {
1890: app.markupDeclaration(declState);
1891: } catch (RuntimeException e) {
1892: throw e;
1893: } catch (Exception e) {
1894: throw new ApplicationException(e);
1895: }
1896: }
1897:
1898: private final void reportStartDocumentTypeDeclaration(
1899: DeclState declState) throws ApplicationException {
1900: try {
1901: app.startDocumentTypeDeclaration(declState);
1902: } catch (RuntimeException e) {
1903: throw e;
1904: } catch (Exception e) {
1905: throw new ApplicationException(e);
1906: }
1907: }
1908:
1909: private final void reportEndDocumentTypeDeclaration(
1910: DeclState declState) throws ApplicationException {
1911: try {
1912: app.endDocumentTypeDeclaration(declState);
1913: } catch (RuntimeException e) {
1914: throw e;
1915: } catch (Exception e) {
1916: throw new ApplicationException(e);
1917: }
1918: }
1919: }
|