0001: /*
0002: * The Apache Software License, Version 1.1
0003: *
0004: *
0005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
0006: * reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Apache Software Foundation (http://www.apache.org/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Xerces" and "Apache Software Foundation" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact apache@apache.org.
0031: *
0032: * 5. Products derived from this software may not be called "Apache",
0033: * nor may "Apache" appear in their name, without prior written
0034: * permission of the Apache Software Foundation.
0035: *
0036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: * SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * This software consists of voluntary contributions made by many
0051: * individuals on behalf of the Apache Software Foundation and was
0052: * originally based on software copyright (c) 1999, International
0053: * Business Machines, Inc., http://www.apache.org. For more
0054: * information on the Apache Software Foundation, please see
0055: * <http://www.apache.org/>.
0056: */
0057:
0058: package org.apache.xerces.validators.common;
0059:
0060: import org.apache.xerces.framework.XMLContentSpec;
0061: import org.apache.xerces.utils.ImplementationMessages;
0062: import org.apache.xerces.utils.QName;
0063: import org.apache.xerces.validators.schema.SubstitutionGroupComparator;
0064: import org.apache.xerces.utils.StringPool;
0065: import org.apache.xerces.validators.schema.SchemaGrammar;
0066:
0067: /**
0068: * DFAContentModel is the derivative of ContentModel that does
0069: * all of the non-trivial element content validation. This class does
0070: * the conversion from the regular expression to the DFA that
0071: * it then uses in its validation algorithm.
0072: *
0073: * @version $Id: DFAContentModel.java,v 1.32 2001/06/20 20:54:37 neilg Exp $
0074: */
0075: public class DFAContentModel implements XMLContentModel {
0076:
0077: //
0078: // Constants
0079: //
0080: // special strings
0081:
0082: /** Epsilon string. */
0083: //private static final String fEpsilonString = "<<CMNODE_EPSILON>>";
0084: private static final int EPSILON = -2;
0085:
0086: /** End-of-content string. */
0087: //private static final String fEOCString = "<<CMNODE_EOC>>";
0088: private static final int EOC = -3;
0089:
0090: // debugging
0091:
0092: /** Set to true to debug content model validation. */
0093: private static final boolean DEBUG_VALIDATE_CONTENT = false;
0094:
0095: //
0096: // Data
0097: //
0098:
0099: /* this is the SubstitutionGroupComparator object */
0100: private SubstitutionGroupComparator comparator = null;
0101:
0102: /**
0103: * This is the map of unique input symbol elements to indices into
0104: * each state's per-input symbol transition table entry. This is part
0105: * of the built DFA information that must be kept around to do the
0106: * actual validation.
0107: */
0108: private QName fElemMap[] = null;
0109:
0110: /**
0111: * This is a map of whether the element map contains information
0112: * related to ANY models.
0113: */
0114: private int fElemMapType[] = null;
0115:
0116: /** The element map size. */
0117: private int fElemMapSize = 0;
0118:
0119: /** Boolean to allow DTDs to validate even with namespace support. */
0120: private boolean fDTD;
0121:
0122: /* Used to indicated mixed model */
0123: private boolean fMixed;
0124:
0125: /**
0126: * The string index for the 'end of content' string that we add to
0127: * the string pool. This is used as the special name of an element
0128: * that represents the end of the syntax tree.
0129: */
0130: private int fEOCIndex = 0;
0131:
0132: /**
0133: * The NFA position of the special EOC (end of content) node. This
0134: * is saved away since it's used during the DFA build.
0135: */
0136: private int fEOCPos = 0;
0137:
0138: /**
0139: * The string index for the 'epsilon' string that we add to the
0140: * string pool. This represents epsilon node transitions in the
0141: * syntax tree.
0142: */
0143: private int fEpsilonIndex = 0;
0144:
0145: /**
0146: * This is an array of booleans, one per state (there are
0147: * fTransTableSize states in the DFA) that indicates whether that
0148: * state is a final state.
0149: */
0150: private boolean fFinalStateFlags[] = null;
0151:
0152: /**
0153: * The list of follow positions for each NFA position (i.e. for each
0154: * non-epsilon leaf node.) This is only used during the building of
0155: * the DFA, and is let go afterwards.
0156: */
0157: private CMStateSet fFollowList[] = null;
0158:
0159: /**
0160: * This is the head node of our intermediate representation. It is
0161: * only non-null during the building of the DFA (just so that it
0162: * does not have to be passed all around.) Once the DFA is built,
0163: * this is no longer required so its nulled out.
0164: */
0165: private CMNode fHeadNode = null;
0166:
0167: /**
0168: * The count of leaf nodes. This is an important number that set some
0169: * limits on the sizes of data structures in the DFA process.
0170: */
0171: private int fLeafCount = 0;
0172:
0173: /**
0174: * An array of non-epsilon leaf nodes, which is used during the DFA
0175: * build operation, then dropped.
0176: */
0177: private CMLeaf fLeafList[] = null;
0178:
0179: /** Array mapping ANY types to the leaf list. */
0180: private int fLeafListType[] = null;
0181:
0182: private ContentLeafNameTypeVector fLeafNameTypeVector = null;
0183:
0184: /**
0185: * The string pool of our parser session. This is set during construction
0186: * and kept around.
0187: */
0188: //private StringPool fStringPool = null;
0189: /**
0190: * This is the transition table that is the main by product of all
0191: * of the effort here. It is an array of arrays of ints. The first
0192: * dimension is the number of states we end up with in the DFA. The
0193: * second dimensions is the number of unique elements in the content
0194: * model (fElemMapSize). Each entry in the second dimension indicates
0195: * the new state given that input for the first dimension's start
0196: * state.
0197: * <p>
0198: * The fElemMap array handles mapping from element indexes to
0199: * positions in the second dimension of the transition table.
0200: */
0201: private int fTransTable[][] = null;
0202:
0203: /**
0204: * The number of valid entries in the transition table, and in the other
0205: * related tables such as fFinalStateFlags.
0206: */
0207: private int fTransTableSize = 0;
0208:
0209: /**
0210: * Flag that indicates that even though we have a "complicated"
0211: * content model, it is valid to have no content. In other words,
0212: * all parts of the content model are optional. For example:
0213: * <pre>
0214: * <!ELEMENT AllOptional (Optional*,NotRequired?)>
0215: * </pre>
0216: */
0217: private boolean fEmptyContentIsValid = false;
0218:
0219: // temp variables
0220:
0221: /** Temporary qualified name. */
0222: private QName fQName = new QName();
0223:
0224: //
0225: // Constructors
0226: //
0227:
0228: /**
0229: * Constructs a DFA content model.
0230: *
0231: * @param stringPool The string pool.
0232: * @param syntaxTree The syntax tree of the content model.
0233: * @param leafCount The number of leaves.
0234: *
0235: * @exception CMException Thrown if DMA can't be built.
0236: */
0237:
0238: // public DFAContentModel(StringPool stringPool,
0239: public DFAContentModel(CMNode syntaxTree, int leafCount)
0240: throws CMException {
0241: this (syntaxTree, leafCount, false, false);
0242: }
0243:
0244: /**
0245: * Constructs a DFA content model.
0246: *
0247: * @param stringPool The string pool.
0248: * @param syntaxTree The syntax tree of the content model.
0249: * @param leafCount The number of leaves.
0250: *
0251: * @exception CMException Thrown if DMA can't be built.
0252: */
0253:
0254: // public DFAContentModel(StringPool stringPool,
0255: public DFAContentModel(CMNode syntaxTree, int leafCount,
0256: boolean dtd, boolean mixed) throws CMException {
0257:
0258: // Store away our index and pools in members
0259: //fStringPool = stringPool;
0260: fLeafCount = leafCount;
0261:
0262: //
0263: // Create some string pool indexes that represent the names of some
0264: // magical nodes in the syntax tree.
0265: //
0266: /*** Defect 945 ***
0267: if (fEpsilonString == null)
0268: {
0269: fEpsilonString = new String("<<CMNODE_EPSILON>>");
0270: fEpsilonString.intern();
0271: fEOCString = new String("<<CMNODE_EOC>>");
0272: fEOCString.intern();
0273: }
0274: /***/
0275:
0276: // fEpsilonIndex = fStringPool.addSymbol(fEpsilonString);
0277: // fEOCIndex = fStringPool.addSymbol(fEOCString);
0278: fEpsilonIndex = EPSILON;
0279: fEOCIndex = EOC;
0280:
0281: fDTD = dtd;
0282: fMixed = mixed;
0283:
0284: //
0285: // Ok, so lets grind through the building of the DFA. This method
0286: // handles the high level logic of the algorithm, but it uses a
0287: // number of helper classes to do its thing.
0288: //
0289: // In order to avoid having hundreds of references to the error and
0290: // string handlers around, this guy and all of his helper classes
0291: // just throw a simple exception and we then pass it along.
0292: //
0293:
0294: DFAContentModel.time -= System.currentTimeMillis();
0295:
0296: buildDFA(syntaxTree);
0297:
0298: DFAContentModel.time += System.currentTimeMillis();
0299:
0300: if (DEBUG_VALIDATE_CONTENT)
0301: System.out.println("DFA build: " + DFAContentModel.time
0302: + "ms");
0303: }
0304:
0305: private static long time = 0;
0306:
0307: //
0308: // XMLContentModel methods
0309: //
0310:
0311: /**
0312: * Check that the specified content is valid according to this
0313: * content model. This method can also be called to do 'what if'
0314: * testing of content models just to see if they would be valid.
0315: * <p>
0316: * A value of -1 in the children array indicates a PCDATA node. All other
0317: * indexes will be positive and represent child elements. The count can be
0318: * zero, since some elements have the EMPTY content model and that must be
0319: * confirmed.
0320: *
0321: * @param children The children of this element. Each integer is an index within
0322: * the <code>StringPool</code> of the child element name. An index
0323: * of -1 is used to indicate an occurrence of non-whitespace character
0324: * data.
0325: * @param offset Offset into the array where the children starts.
0326: * @param length The number of entries in the <code>children</code> array.
0327: *
0328: * @return The value -1 if fully valid, else the 0 based index of the child
0329: * that first failed. If the value returned is equal to the number
0330: * of children, then the specified children are valid but additional
0331: * content is required to reach a valid ending state.
0332: *
0333: * @exception CMException Thrown on error.
0334: */
0335: public int validateContent(QName children[], int offset, int length)
0336: throws CMException {
0337:
0338: if (DEBUG_VALIDATE_CONTENT)
0339: System.out.println("DFAContentModel#validateContent");
0340:
0341: //
0342: // A DFA content model must *always* have at least 1 child
0343: // so a failure is given if no children present.
0344: //
0345: // Defect 782: This is an incorrect statement because a DFA
0346: // content model is also used for constructions such as:
0347: //
0348: // (Optional*,NotRequired?)
0349: //
0350: // where a perfectly valid content would be NO CHILDREN.
0351: // Therefore, if there are no children, we must check to
0352: // see if the CMNODE_EOC marker is a valid start state! -Ac
0353: //
0354: if (length == 0) {
0355: if (DEBUG_VALIDATE_CONTENT) {
0356: System.out.println("!!! no children");
0357: System.out.println("elemMap=" + fElemMap);
0358: for (int i = 0; i < fElemMap.length; i++) {
0359: int uriIndex = fElemMap[i].uri;
0360: int localpartIndex = fElemMap[i].localpart;
0361: /*
0362: System.out.println("fElemMap["+i+"]="+uriIndex+","+
0363: localpartIndex+" ("+
0364: fStringPool.toString(uriIndex)+", "+
0365: fStringPool.toString(localpartIndex)+
0366: ')');
0367: */
0368: }
0369: System.out.println("EOCIndex=" + fEOCIndex);
0370: }
0371:
0372: return fEmptyContentIsValid ? -1 : 0;
0373:
0374: } // if child count == 0
0375:
0376: //
0377: // Lets loop through the children in the array and move our way
0378: // through the states. Note that we use the fElemMap array to map
0379: // an element index to a state index.
0380: //
0381: int curState = 0;
0382: int nextState = 0;
0383: for (int childIndex = 0; childIndex < length; childIndex++) {
0384: // Get the current element index out
0385: final QName curElem = children[offset + childIndex];
0386: //System.out.println("children["+(offset+childIndex)+"]: "+curElem);
0387:
0388: // If this is text in a Schema mixed content model, skip it.
0389: if (fMixed && (curElem.localpart == -1)) {
0390: continue;
0391: }
0392:
0393: // Look up this child in our element map
0394: int elemIndex = 0;
0395: for (; elemIndex < fElemMapSize; elemIndex++) {
0396: if (fDTD) {
0397: if (fElemMap[elemIndex].rawname == curElem.rawname) {
0398: nextState = fTransTable[curState][elemIndex];
0399: if (nextState != -1)
0400: break;
0401: }
0402: } else {
0403: int type = fElemMapType[elemIndex] & 0x0f;
0404: if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
0405: if (fElemMap[elemIndex].uri == curElem.uri
0406: && fElemMap[elemIndex].localpart == curElem.localpart) {
0407: nextState = fTransTable[curState][elemIndex];
0408: if (nextState != -1)
0409: break;
0410: }
0411: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
0412: nextState = fTransTable[curState][elemIndex];
0413: if (nextState != -1)
0414: break;
0415: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_NS) {
0416: if (curElem.uri == fElemMap[elemIndex].uri) {
0417: nextState = fTransTable[curState][elemIndex];
0418: if (nextState != -1)
0419: break;
0420: }
0421: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
0422: if (fElemMap[elemIndex].uri != curElem.uri) {
0423: nextState = fTransTable[curState][elemIndex];
0424: if (nextState != -1)
0425: break;
0426: }
0427: }
0428: }
0429: }
0430:
0431: // If "nextState" is -1, we found a match, but the transition is invalid
0432: if (nextState == -1) {
0433: if (DEBUG_VALIDATE_CONTENT)
0434: System.out.println("!!! not a legal transition");
0435: return childIndex;
0436: }
0437:
0438: // If we didn't find it, then obviously not valid
0439: if (elemIndex == fElemMapSize) {
0440: if (DEBUG_VALIDATE_CONTENT) {
0441: System.out.println("!!! didn't find it");
0442:
0443: System.out.println("curElem : " + curElem);
0444: for (int i = 0; i < fElemMapSize; i++) {
0445: System.out.println("fElemMap[" + i + "] = "
0446: + fElemMap[i]);
0447: System.out.println("fElemMapType[" + i + "] = "
0448: + fElemMapType[i]);
0449: }
0450: }
0451:
0452: return childIndex;
0453: }
0454:
0455: curState = nextState;
0456: nextState = 0;
0457:
0458: }
0459:
0460: //
0461: // We transitioned all the way through the input list. However, that
0462: // does not mean that we ended in a final state. So check whether
0463: // our ending state is a final state.
0464: //
0465: if (DEBUG_VALIDATE_CONTENT)
0466: System.out.println("curState=" + curState + ", childCount="
0467: + length);
0468: if (!fFinalStateFlags[curState])
0469: return length;
0470:
0471: // success!
0472: return -1;
0473: }
0474:
0475: private boolean isEqual(QName name1, QName name2) {
0476: return name1.localpart == name2.localpart
0477: && name1.uri == name2.uri;
0478: }
0479:
0480: public int validateContentSpecial(QName children[], int offset,
0481: int length) throws Exception {
0482: if (DEBUG_VALIDATE_CONTENT)
0483: System.out
0484: .println("DFAContentModel#validateContentSpecial");
0485:
0486: if (comparator == null) {
0487: return validateContent(children, offset, length);
0488: }
0489:
0490: if (length == 0) {
0491: if (DEBUG_VALIDATE_CONTENT) {
0492: System.out.println("!!! no children");
0493: System.out.println("elemMap=" + fElemMap);
0494: for (int i = 0; i < fElemMap.length; i++) {
0495: int uriIndex = fElemMap[i].uri;
0496: int localpartIndex = fElemMap[i].localpart;
0497: }
0498: System.out.println("EOCIndex=" + fEOCIndex);
0499: }
0500:
0501: return fEmptyContentIsValid ? -1 : 0;
0502:
0503: } // if child count == 0
0504:
0505: //
0506: // Lets loop through the children in the array and move our way
0507: // through the states. Note that we use the fElemMap array to map
0508: // an element index to a state index.
0509: //
0510: int curState = 0;
0511: int nextState = 0;
0512: for (int childIndex = 0; childIndex < length; childIndex++) {
0513: // Get the current element index out
0514: final QName curElem = children[offset + childIndex];
0515:
0516: // Look up this child in our element map
0517: int elemIndex = 0;
0518: for (; elemIndex < fElemMapSize; elemIndex++) {
0519: int type = fElemMapType[elemIndex] & 0x0f;
0520: if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
0521: if (comparator.isEquivalentTo(curElem,
0522: fElemMap[elemIndex])) {
0523: nextState = fTransTable[curState][elemIndex];
0524: if (nextState != -1)
0525: break;
0526: }
0527: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
0528: nextState = fTransTable[curState][elemIndex];
0529: if (nextState != -1)
0530: break;
0531: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_NS) {
0532: if (curElem.uri == fElemMap[elemIndex].uri) {
0533: nextState = fTransTable[curState][elemIndex];
0534: if (nextState != -1)
0535: break;
0536: }
0537: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
0538: if (fElemMap[elemIndex].uri != curElem.uri) {
0539: nextState = fTransTable[curState][elemIndex];
0540: if (nextState != -1)
0541: break;
0542: }
0543: }
0544:
0545: }
0546:
0547: // If "nextState" is -1, we found a match, but the transition is invalid
0548: if (nextState == -1) {
0549: if (DEBUG_VALIDATE_CONTENT)
0550: System.out.println("!!! not a legal transition");
0551: return childIndex;
0552: }
0553:
0554: // If we didn't find it, then obviously not valid
0555: if (elemIndex == fElemMapSize) {
0556: if (DEBUG_VALIDATE_CONTENT) {
0557: System.out.println("!!! didn't find it");
0558:
0559: System.out.println("curElem : " + curElem);
0560: for (int i = 0; i < fElemMapSize; i++) {
0561: System.out.println("fElemMap[" + i + "] = "
0562: + fElemMap[i]);
0563: System.out.println("fElemMapType[" + i + "] = "
0564: + fElemMapType[i]);
0565: }
0566: }
0567:
0568: return childIndex;
0569: }
0570:
0571: curState = nextState;
0572: nextState = 0;
0573:
0574: }
0575:
0576: //
0577: // We transitioned all the way through the input list. However, that
0578: // does not mean that we ended in a final state. So check whether
0579: // our ending state is a final state.
0580: //
0581: if (DEBUG_VALIDATE_CONTENT)
0582: System.out.println("curState=" + curState + ", childCount="
0583: + length);
0584: if (!fFinalStateFlags[curState])
0585: return length;
0586:
0587: // success!
0588: return -1;
0589: }
0590:
0591: /**
0592: * check whether the given state is one of the final states
0593: *
0594: * @param state the state to check
0595: *
0596: * @return whether it's a final state
0597: */
0598: public boolean isFinalState(int state) {
0599: if (state < 0)
0600: return false;
0601: return fFinalStateFlags[state];
0602: }
0603:
0604: /**
0605: * one transition only
0606: *
0607: * @param curElem The current element's QName
0608: * @param stateStack stack to store the previous state
0609: * @param curPos the current position of the stack
0610: *
0611: * @return The value -1 if not valid, otherwise the index of the matching leaf
0612: *
0613: * @exception CMException Thrown on error.
0614: */
0615: public int oneTransition(QName curElem, int[] stateStack, int curPos)
0616: throws Exception {
0617: int curState = stateStack[curPos];
0618:
0619: // if it's an illegal state, just return -1
0620: if (curState < 0) {
0621: return -1;
0622: }
0623:
0624: int nextState = 0;
0625: int elemIndex = 0;
0626:
0627: for (; elemIndex < fElemMapSize; elemIndex++) {
0628: int type = fElemMapType[elemIndex] & 0x0f;
0629: if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
0630: if (isEqual(curElem, fElemMap[elemIndex])) {
0631: nextState = fTransTable[curState][elemIndex];
0632: if (nextState != -1)
0633: break;
0634: }
0635: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
0636: nextState = fTransTable[curState][elemIndex];
0637: if (nextState != -1)
0638: break;
0639: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_NS) {
0640: if (curElem.uri == fElemMap[elemIndex].uri) {
0641: nextState = fTransTable[curState][elemIndex];
0642: if (nextState != -1)
0643: break;
0644: }
0645: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
0646: if (fElemMap[elemIndex].uri != curElem.uri) {
0647: nextState = fTransTable[curState][elemIndex];
0648: if (nextState != -1)
0649: break;
0650: }
0651: }
0652: }
0653:
0654: // if we can't find a match, try substitutionGroup
0655: if (elemIndex == fElemMapSize && comparator != null) {
0656: for (elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
0657: if (fElemMapType[elemIndex] == XMLContentSpec.CONTENTSPECNODE_LEAF) {
0658: if (comparator.isEquivalentTo(curElem,
0659: fElemMap[elemIndex])) {
0660: nextState = fTransTable[curState][elemIndex];
0661: if (nextState != -1)
0662: break;
0663: }
0664: }
0665: }
0666: }
0667:
0668: // if we still can't find a match, set the state to -1 (error)
0669: // and return -1
0670: if (elemIndex == fElemMapSize) {
0671: stateStack[curPos] = -1;
0672: return -1;
0673: }
0674:
0675: stateStack[curPos] = nextState;
0676: return elemIndex;
0677: }
0678:
0679: public void setSubstitutionGroupComparator(
0680: SubstitutionGroupComparator comparator) {
0681: this .comparator = comparator;
0682: }
0683:
0684: /**
0685: * Returns information about which elements can be placed at a particular point
0686: * in the passed element's content model.
0687: * <p>
0688: * Note that the incoming content model to test must be valid at least up to
0689: * the insertion point. If not, then -1 will be returned and the info object
0690: * will not have been filled in.
0691: * <p>
0692: * If, on return, the info.isValidEOC flag is set, then the 'insert after'
0693: * element is a valid end of content. In other words, nothing needs to be
0694: * inserted after it to make the parent element's content model valid.
0695: *
0696: * @param fullyValid Only return elements that can be inserted and still
0697: * maintain the validity of subsequent elements past the
0698: * insertion point (if any). If the insertion point is at
0699: * the end, and this is true, then only elements that can
0700: * be legal final states will be returned.
0701: * @param info An object that contains the required input data for the method,
0702: * and which will contain the output information if successful.
0703: *
0704: * @return The value -1 if fully valid, else the 0 based index of the child
0705: * that first failed before the insertion point. If the value
0706: * returned is equal to the number of children, then the specified
0707: * children are valid but additional content is required to reach a
0708: * valid ending state.
0709: *
0710: * @see InsertableElementsInfo
0711: */
0712: public int whatCanGoHere(boolean fullyValid,
0713: InsertableElementsInfo info) throws CMException {
0714:
0715: //
0716: // First, lets make sure that the passed in current content is valid
0717: // up to the insert point.
0718: //
0719: int curState = 0;
0720: for (int childIndex = 0; childIndex < info.insertAt; childIndex++) {
0721: // Get the current element index out
0722: final QName curElem = info.curChildren[childIndex];
0723:
0724: // Look up this child in our element map
0725: int elemIndex = 0;
0726: for (; elemIndex < fElemMapSize; elemIndex++) {
0727: if (fElemMap[elemIndex].uri == curElem.uri
0728: && fElemMap[elemIndex].localpart == curElem.localpart)
0729: break;
0730: }
0731:
0732: // If we didn't find it, then not valid so return failure index
0733: if (elemIndex == fElemMapSize)
0734: return childIndex;
0735:
0736: //
0737: // Look up the next state for this input symbol when in the
0738: // current state.
0739: //
0740: curState = fTransTable[curState][elemIndex];
0741:
0742: // If its not a legal transition, then invalid
0743: if (curState == -1)
0744: return childIndex;
0745: }
0746:
0747: //
0748: // If we got here, then curState is set to the state that would be
0749: // the transition before the insertion point. We let this sit until
0750: // below, where it will be needed.
0751: //
0752: final int insertState = curState;
0753:
0754: //
0755: // Set any stuff we can know right off the bat for all cases.
0756: // We can set the valid EOC flag at this point
0757: // since its just based on the state we ended in at the insert point.
0758: // The 'canHoldPCData" will be true if it's a mixed content model.
0759: //
0760: info.canHoldPCData = fMixed;
0761: info.isValidEOC = fFinalStateFlags[insertState];
0762:
0763: //
0764: // Set the results count member and then see if we need to reallocate
0765: // the outgoing arrays.
0766: //
0767: info.resultsCount = fElemMapSize;
0768:
0769: if ((info.results == null)
0770: || (info.results.length < info.resultsCount))
0771: info.results = new boolean[info.resultsCount];
0772:
0773: if ((info.possibleChildren == null)
0774: || (info.possibleChildren.length < info.resultsCount)) {
0775: info.possibleChildren = new QName[info.resultsCount];
0776: for (int i = 0; i < info.possibleChildren.length; i++) {
0777: info.possibleChildren[i] = new QName();
0778: }
0779: }
0780:
0781: //
0782: // Fill in the possible children array, from our array. For each one
0783: // of them, see if there is a valid transition from our insert at
0784: // state on that input. Mark the results index for that child according
0785: // to whether there is a transition or not.
0786: //
0787: for (int index = 0; index < fElemMapSize; index++) {
0788: info.possibleChildren[index].setValues(fElemMap[index]);
0789: info.results[index] = (fTransTable[insertState][index] != -1);
0790: }
0791:
0792: //
0793: // If the fully valid parameter is set, then we have to go through
0794: // the grunt work of plugging in each possible insertable element
0795: // and running the DFA from that point to see if it would create a
0796: // fully valid content model.
0797: //
0798: // <TBD> When/if the validator is changed to be stateful, then change
0799: // this stuff to start the exploratory validation at the insert state,
0800: // not from the start each time.
0801: //
0802: if (fullyValid) {
0803: for (int index = 0; index < info.resultsCount; index++) {
0804: // Don't need to consider this one since its not insertable
0805: if (!info.results[index])
0806: continue;
0807:
0808: // Stick this element into the insert at spot
0809: info.curChildren[info.insertAt] = info.possibleChildren[index];
0810:
0811: // And validate it. If it fails, then this one loses
0812: if (validateContent(info.curChildren, 0,
0813: info.childCount) != -1)
0814: info.results[index] = false;
0815: }
0816: }
0817:
0818: return -1;
0819: }
0820:
0821: public ContentLeafNameTypeVector getContentLeafNameTypeVector() {
0822: return fLeafNameTypeVector;
0823: }
0824:
0825: // Unique Particle Attribution
0826: // store the conflict results between any two elements in fElemMap
0827: // -1: not compared; 0: no conflict; 1: conflict
0828: private byte fConflictTable[][];
0829:
0830: // check UPA after build the DFA
0831: public void checkUniqueParticleAttribution(SchemaGrammar gram)
0832: throws Exception {
0833: // rename back
0834: for (int i = 0; i < fElemMapSize; i++)
0835: fElemMap[i].uri = gram
0836: .getContentSpecOrgUri(fElemMap[i].uri);
0837:
0838: // initialize the conflict table
0839: fConflictTable = new byte[fElemMapSize][fElemMapSize];
0840: for (int j = 0; j < fElemMapSize; j++) {
0841: for (int k = j + 1; k < fElemMapSize; k++)
0842: fConflictTable[j][k] = -1;
0843: }
0844:
0845: // for each state, check whether it has overlap transitions
0846: for (int i = 0; i < fTransTable.length
0847: && fTransTable[i] != null; i++) {
0848: for (int j = 0; j < fElemMapSize; j++) {
0849: for (int k = j + 1; k < fElemMapSize; k++) {
0850: if (fTransTable[i][j] != -1
0851: && fTransTable[i][k] != -1) {
0852: if (fConflictTable[j][k] == -1) {
0853: fConflictTable[j][k] = ElementWildcard
0854: .conflict(fElemMapType[j],
0855: fElemMap[j].localpart,
0856: fElemMap[j].uri,
0857: fElemMapType[k],
0858: fElemMap[k].localpart,
0859: fElemMap[k].uri, comparator) ? (byte) 1
0860: : (byte) 0;
0861: }
0862: }
0863: }
0864: }
0865: }
0866:
0867: fConflictTable = null;
0868: }
0869:
0870: // Unique Particle Attribution
0871:
0872: //
0873: // Private methods
0874: //
0875:
0876: /**
0877: * Builds the internal DFA transition table from the given syntax tree.
0878: *
0879: * @param syntaxTree The syntax tree.
0880: *
0881: * @exception CMException Thrown if DFA cannot be built.
0882: */
0883: private void buildDFA(CMNode syntaxTree) throws CMException {
0884: //
0885: // The first step we need to take is to rewrite the content model
0886: // using our CMNode objects, and in the process get rid of any
0887: // repetition short cuts, converting them into '*' style repetitions
0888: // or getting rid of repetitions altogether.
0889: //
0890: // The conversions done are:
0891: //
0892: // x+ -> (x|x*)
0893: // x? -> (x|epsilon)
0894: //
0895: // This is a relatively complex scenario. What is happening is that
0896: // we create a top level binary node of which the special EOC value
0897: // is set as the right side node. The the left side is set to the
0898: // rewritten syntax tree. The source is the original content model
0899: // info from the decl pool. The rewrite is done by buildSyntaxTree()
0900: // which recurses the decl pool's content of the element and builds
0901: // a new tree in the process.
0902: //
0903: // Note that, during this operation, we set each non-epsilon leaf
0904: // node's DFA state position and count the number of such leafs, which
0905: // is left in the fLeafCount member.
0906: //
0907: // The nodeTmp object is passed in just as a temp node to use during
0908: // the recursion. Otherwise, we'd have to create a new node on every
0909: // level of recursion, which would be piggy in Java (as is everything
0910: // for that matter.)
0911: //
0912:
0913: /* MODIFIED (Jan, 2001)
0914: *
0915: * Use following rules.
0916: * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
0917: * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
0918: *
0919: * The same computation of follow as x* is applied to x+
0920: *
0921: * The modification drastically reduces computation time of
0922: * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
0923: */
0924:
0925: fQName.setValues(-1, fEOCIndex, fEOCIndex);
0926: CMLeaf nodeEOC = new CMLeaf(fQName);
0927: fHeadNode = new CMBinOp(XMLContentSpec.CONTENTSPECNODE_SEQ,
0928: syntaxTree, nodeEOC);
0929:
0930: //
0931: // And handle specially the EOC node, which also must be numbered
0932: // and counted as a non-epsilon leaf node. It could not be handled
0933: // in the above tree build because it was created before all that
0934: // started. We save the EOC position since its used during the DFA
0935: // building loop.
0936: //
0937: fEOCPos = fLeafCount;
0938: nodeEOC.setPosition(fLeafCount++);
0939:
0940: //
0941: // Ok, so now we have to iterate the new tree and do a little more
0942: // work now that we know the leaf count. One thing we need to do is
0943: // to calculate the first and last position sets of each node. This
0944: // is cached away in each of the nodes.
0945: //
0946: // Along the way we also set the leaf count in each node as the
0947: // maximum state count. They must know this in order to create their
0948: // first/last pos sets.
0949: //
0950: // We also need to build an array of references to the non-epsilon
0951: // leaf nodes. Since we iterate it in the same way as before, this
0952: // will put them in the array according to their position values.
0953: //
0954: fLeafList = new CMLeaf[fLeafCount];
0955: fLeafListType = new int[fLeafCount];
0956: postTreeBuildInit(fHeadNode, 0);
0957:
0958: //
0959: // And, moving onward... We now need to build the follow position
0960: // sets for all the nodes. So we allocate an array of state sets,
0961: // one for each leaf node (i.e. each DFA position.)
0962: //
0963: fFollowList = new CMStateSet[fLeafCount];
0964: for (int index = 0; index < fLeafCount; index++)
0965: fFollowList[index] = new CMStateSet(fLeafCount);
0966: calcFollowList(fHeadNode);
0967: //
0968: // And finally the big push... Now we build the DFA using all the
0969: // states and the tree we've built up. First we set up the various
0970: // data structures we are going to use while we do this.
0971: //
0972: // First of all we need an array of unique element names in our
0973: // content model. For each transition table entry, we need a set of
0974: // contiguous indices to represent the transitions for a particular
0975: // input element. So we need to a zero based range of indexes that
0976: // map to element types. This element map provides that mapping.
0977: //
0978: fElemMap = new QName[fLeafCount];
0979: fElemMapType = new int[fLeafCount];
0980: fElemMapSize = 0;
0981: for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
0982: fElemMap[outIndex] = new QName();
0983:
0984: if ((fLeafListType[outIndex] & 0x0f) != 0) {
0985: if (fLeafNameTypeVector == null) {
0986: fLeafNameTypeVector = new ContentLeafNameTypeVector();
0987: }
0988: }
0989:
0990: // Get the current leaf's element index
0991: final QName element = fLeafList[outIndex].getElement();
0992:
0993: // See if the current leaf node's element index is in the list
0994: int inIndex = 0;
0995: for (; inIndex < fElemMapSize; inIndex++) {
0996: if (fDTD) {
0997: if (fElemMap[inIndex].rawname == element.rawname) {
0998: break;
0999: }
1000: } else {
1001: if (fElemMapType[inIndex] == fLeafListType[outIndex]
1002: && fElemMap[inIndex].uri == element.uri
1003: && fElemMap[inIndex].localpart == element.localpart)
1004: break;
1005: }
1006: }
1007:
1008: // If it was not in the list, then add it, if not the EOC node
1009: if (inIndex == fElemMapSize) {
1010: //if (fDTD) {
1011: // fElemMap[fElemMapSize].setValues(-1, element.rawname, element.rawname, -1);
1012: //}
1013: //else {
1014: fElemMap[fElemMapSize].setValues(element);
1015: //}
1016: fElemMapType[fElemMapSize] = fLeafListType[outIndex];
1017: fElemMapSize++;
1018: }
1019: }
1020: // set up the fLeafNameTypeVector object if there is one.
1021: if (fLeafNameTypeVector != null) {
1022: fLeafNameTypeVector.setValues(fElemMap, fElemMapType,
1023: fElemMapSize);
1024: }
1025:
1026: /***
1027: * Optimization(Jan, 2001); We sort fLeafList according to
1028: * elemIndex which is *uniquely* associated to each leaf.
1029: * We are *assuming* that each element appears in at least one leaf.
1030: **/
1031:
1032: int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
1033: int fSortCount = 0;
1034:
1035: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
1036: for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
1037: final QName leaf = fLeafList[leafIndex].getElement();
1038: final int leafType = fLeafListType[leafIndex];
1039: final QName element = fElemMap[elemIndex];
1040: if (fDTD) {
1041: if (leaf.rawname == element.rawname) {
1042: fLeafSorter[fSortCount++] = leafIndex;
1043: }
1044: } else if (fElemMapType[elemIndex] == fLeafListType[leafIndex]
1045: && leaf.uri == element.uri
1046: && leaf.localpart == element.localpart) {
1047: fLeafSorter[fSortCount++] = leafIndex;
1048: }
1049: }
1050: fLeafSorter[fSortCount++] = -1;
1051: }
1052:
1053: /* Optimization(Jan, 2001) */
1054:
1055: //
1056: // Next lets create some arrays, some that that hold transient
1057: // information during the DFA build and some that are permament.
1058: // These are kind of sticky since we cannot know how big they will
1059: // get, but we don't want to use any Java collections because of
1060: // performance.
1061: //
1062: // Basically they will probably be about fLeafCount*2 on average,
1063: // but can be as large as 2^(fLeafCount*2), worst case. So we start
1064: // with fLeafCount*4 as a middle ground. This will be very unlikely
1065: // to ever have to expand, though it if does, the overhead will be
1066: // somewhat ugly.
1067: //
1068: int curArraySize = fLeafCount * 4;
1069: CMStateSet[] statesToDo = new CMStateSet[curArraySize];
1070: fFinalStateFlags = new boolean[curArraySize];
1071: fTransTable = new int[curArraySize][];
1072:
1073: //
1074: // Ok we start with the initial set as the first pos set of the
1075: // head node (which is the seq node that holds the content model
1076: // and the EOC node.)
1077: //
1078: CMStateSet setT = fHeadNode.firstPos();
1079:
1080: //
1081: // Init our two state flags. Basically the unmarked state counter
1082: // is always chasing the current state counter. When it catches up,
1083: // that means we made a pass through that did not add any new states
1084: // to the lists, at which time we are done. We could have used a
1085: // expanding array of flags which we used to mark off states as we
1086: // complete them, but this is easier though less readable maybe.
1087: //
1088: int unmarkedState = 0;
1089: int curState = 0;
1090:
1091: //
1092: // Init the first transition table entry, and put the initial state
1093: // into the states to do list, then bump the current state.
1094: //
1095: fTransTable[curState] = makeDefStateList();
1096: statesToDo[curState] = setT;
1097: curState++;
1098:
1099: /* Optimization(Jan, 2001); This is faster for
1100: * a large content model such as, "(t001+|t002+|.... |t500+)".
1101: */
1102:
1103: java.util.Hashtable stateTable = new java.util.Hashtable();
1104:
1105: /* Optimization(Jan, 2001) */
1106:
1107: //
1108: // Ok, almost done with the algorithm... We now enter the
1109: // loop where we go until the states done counter catches up with
1110: // the states to do counter.
1111: //
1112: while (unmarkedState < curState) {
1113: //
1114: // Get the first unmarked state out of the list of states to do.
1115: // And get the associated transition table entry.
1116: //
1117: setT = statesToDo[unmarkedState];
1118: int[] transEntry = fTransTable[unmarkedState];
1119:
1120: // Mark this one final if it contains the EOC state
1121: fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
1122:
1123: // Bump up the unmarked state count, marking this state done
1124: unmarkedState++;
1125:
1126: // Loop through each possible input symbol in the element map
1127: CMStateSet newSet = null;
1128: /* Optimization(Jan, 2001) */
1129: int sorterIndex = 0;
1130: /* Optimization(Jan, 2001) */
1131: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
1132: //
1133: // Build up a set of states which is the union of all of
1134: // the follow sets of DFA positions that are in the current
1135: // state. If we gave away the new set last time through then
1136: // create a new one. Otherwise, zero out the existing one.
1137: //
1138: if (newSet == null)
1139: newSet = new CMStateSet(fLeafCount);
1140: else
1141: newSet.zeroBits();
1142:
1143: /* Optimization(Jan, 2001) */
1144: int leafIndex = fLeafSorter[sorterIndex++];
1145:
1146: while (leafIndex != -1) {
1147: // If this leaf index (DFA position) is in the current set...
1148: if (setT.getBit(leafIndex)) {
1149: //
1150: // If this leaf is the current input symbol, then we
1151: // want to add its follow list to the set of states to
1152: // transition to from the current state.
1153: //
1154: newSet.union(fFollowList[leafIndex]);
1155: }
1156:
1157: leafIndex = fLeafSorter[sorterIndex++];
1158: }
1159: /* Optimization(Jan, 2001) */
1160:
1161: //
1162: // If this new set is not empty, then see if its in the list
1163: // of states to do. If not, then add it.
1164: //
1165: if (!newSet.isEmpty()) {
1166: //
1167: // Search the 'states to do' list to see if this new
1168: // state set is already in there.
1169: //
1170:
1171: /* Optimization(Jan, 2001) */
1172: Integer stateObj = (Integer) stateTable.get(newSet);
1173: int stateIndex = (stateObj == null ? curState
1174: : stateObj.intValue());
1175: /* Optimization(Jan, 2001) */
1176:
1177: // If we did not find it, then add it
1178: if (stateIndex == curState) {
1179: //
1180: // Put this new state into the states to do and init
1181: // a new entry at the same index in the transition
1182: // table.
1183: //
1184: statesToDo[curState] = newSet;
1185: fTransTable[curState] = makeDefStateList();
1186:
1187: /* Optimization(Jan, 2001) */
1188: stateTable.put(newSet, new Integer(curState));
1189: /* Optimization(Jan, 2001) */
1190:
1191: // We now have a new state to do so bump the count
1192: curState++;
1193:
1194: //
1195: // Null out the new set to indicate we adopted it.
1196: // This will cause the creation of a new set on the
1197: // next time around the loop.
1198: //
1199: newSet = null;
1200: }
1201:
1202: //
1203: // Now set this state in the transition table's entry
1204: // for this element (using its index), with the DFA
1205: // state we will move to from the current state when we
1206: // see this input element.
1207: //
1208: transEntry[elemIndex] = stateIndex;
1209:
1210: // Expand the arrays if we're full
1211: if (curState == curArraySize) {
1212: //
1213: // Yikes, we overflowed the initial array size, so
1214: // we've got to expand all of these arrays. So adjust
1215: // up the size by 50% and allocate new arrays.
1216: //
1217: final int newSize = (int) (curArraySize * 1.5);
1218: CMStateSet[] newToDo = new CMStateSet[newSize];
1219: boolean[] newFinalFlags = new boolean[newSize];
1220: int[][] newTransTable = new int[newSize][];
1221:
1222: // Copy over all of the existing content
1223: for (int expIndex = 0; expIndex < curArraySize; expIndex++) {
1224: newToDo[expIndex] = statesToDo[expIndex];
1225: newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
1226: newTransTable[expIndex] = fTransTable[expIndex];
1227: }
1228:
1229: // Store the new array size
1230: curArraySize = newSize;
1231: statesToDo = newToDo;
1232: fFinalStateFlags = newFinalFlags;
1233: fTransTable = newTransTable;
1234: }
1235: }
1236: }
1237: }
1238:
1239: // Check to see if we can set the fEmptyContentIsValid flag.
1240: fEmptyContentIsValid = ((CMBinOp) fHeadNode).getLeft()
1241: .isNullable();
1242:
1243: //
1244: // And now we can say bye bye to the temp representation since we've
1245: // built the DFA.
1246: //
1247: if (DEBUG_VALIDATE_CONTENT)
1248: dumpTree(fHeadNode, 0);
1249: fHeadNode = null;
1250: fLeafList = null;
1251: fFollowList = null;
1252:
1253: }
1254:
1255: /**
1256: * Calculates the follow list of the current node.
1257: *
1258: * @param nodeCur The curent node.
1259: *
1260: * @exception CMException Thrown if follow list cannot be calculated.
1261: */
1262: private void calcFollowList(CMNode nodeCur) throws CMException {
1263: // Recurse as required
1264: if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE) {
1265: // Recurse only
1266: calcFollowList(((CMBinOp) nodeCur).getLeft());
1267: calcFollowList(((CMBinOp) nodeCur).getRight());
1268: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ) {
1269: // Recurse first
1270: calcFollowList(((CMBinOp) nodeCur).getLeft());
1271: calcFollowList(((CMBinOp) nodeCur).getRight());
1272:
1273: //
1274: // Now handle our level. We use our left child's last pos
1275: // set and our right child's first pos set, so go ahead and
1276: // get them ahead of time.
1277: //
1278: final CMStateSet last = ((CMBinOp) nodeCur).getLeft()
1279: .lastPos();
1280: final CMStateSet first = ((CMBinOp) nodeCur).getRight()
1281: .firstPos();
1282:
1283: //
1284: // Now, for every position which is in our left child's last set
1285: // add all of the states in our right child's first set to the
1286: // follow set for that position.
1287: //
1288: for (int index = 0; index < fLeafCount; index++) {
1289: if (last.getBit(index))
1290: fFollowList[index].union(first);
1291: }
1292: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
1293: || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE) {
1294: // Recurse first
1295: calcFollowList(((CMUniOp) nodeCur).getChild());
1296:
1297: //
1298: // Now handle our level. We use our own first and last position
1299: // sets, so get them up front.
1300: //
1301: final CMStateSet first = nodeCur.firstPos();
1302: final CMStateSet last = nodeCur.lastPos();
1303:
1304: //
1305: // For every position which is in our last position set, add all
1306: // of our first position states to the follow set for that
1307: // position.
1308: //
1309: for (int index = 0; index < fLeafCount; index++) {
1310: if (last.getBit(index))
1311: fFollowList[index].union(first);
1312: }
1313: }
1314:
1315: else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
1316: // Recurse only
1317: calcFollowList(((CMUniOp) nodeCur).getChild());
1318: }
1319:
1320: }
1321:
1322: /**
1323: * Dumps the tree of the current node to standard output.
1324: *
1325: * @param nodeCur The current node.
1326: * @param level The maximum levels to output.
1327: *
1328: * @exception CMException Thrown on error.
1329: */
1330: private void dumpTree(CMNode nodeCur, int level) throws CMException {
1331: for (int index = 0; index < level; index++)
1332: System.out.print(" ");
1333:
1334: int type = nodeCur.type();
1335:
1336: switch (type & 0x0f) {
1337:
1338: case XMLContentSpec.CONTENTSPECNODE_CHOICE:
1339: case XMLContentSpec.CONTENTSPECNODE_SEQ: {
1340: if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
1341: System.out.print("Choice Node ");
1342: else
1343: System.out.print("Seq Node ");
1344:
1345: if (nodeCur.isNullable())
1346: System.out.print("Nullable ");
1347:
1348: System.out.print("firstPos=");
1349: System.out.print(nodeCur.firstPos().toString());
1350: System.out.print(" lastPos=");
1351: System.out.println(nodeCur.lastPos().toString());
1352:
1353: dumpTree(((CMBinOp) nodeCur).getLeft(), level + 1);
1354: dumpTree(((CMBinOp) nodeCur).getRight(), level + 1);
1355: break;
1356: }
1357: case XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE:
1358: case XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE:
1359: case XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE: {
1360: System.out.print("Rep Node ");
1361:
1362: if (nodeCur.isNullable())
1363: System.out.print("Nullable ");
1364:
1365: System.out.print("firstPos=");
1366: System.out.print(nodeCur.firstPos().toString());
1367: System.out.print(" lastPos=");
1368: System.out.println(nodeCur.lastPos().toString());
1369:
1370: dumpTree(((CMUniOp) nodeCur).getChild(), level + 1);
1371: break;
1372: }
1373: case XMLContentSpec.CONTENTSPECNODE_LEAF: {
1374: System.out.print("Leaf: (pos="
1375: + ((CMLeaf) nodeCur).getPosition() + "), "
1376: + ((CMLeaf) nodeCur).getElement() + "(elemIndex="
1377: + ((CMLeaf) nodeCur).getElement() + ") ");
1378:
1379: if (nodeCur.isNullable())
1380: System.out.print(" Nullable ");
1381:
1382: System.out.print("firstPos=");
1383: System.out.print(nodeCur.firstPos().toString());
1384: System.out.print(" lastPos=");
1385: System.out.println(nodeCur.lastPos().toString());
1386: break;
1387: }
1388: case XMLContentSpec.CONTENTSPECNODE_ANY:
1389: case XMLContentSpec.CONTENTSPECNODE_ANY_OTHER:
1390: case XMLContentSpec.CONTENTSPECNODE_ANY_NS: {
1391: if (type == XMLContentSpec.CONTENTSPECNODE_ANY)
1392: System.out.print("Any Node: ");
1393: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LAX)
1394: System.out.print("Any lax Node: ");
1395: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_SKIP)
1396: System.out.print("Any skip Node: ");
1397: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER)
1398: System.out.print("Any other Node: ");
1399: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER_LAX)
1400: System.out.print("Any other lax Node: ");
1401: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER_SKIP)
1402: System.out.print("Any other skip Node: ");
1403: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_NS)
1404: System.out.print("Any namespace Node: ");
1405: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_NS_LAX)
1406: System.out.print("Any namespace lax Node: ");
1407: else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_NS_SKIP)
1408: System.out.print("Any namespace skip Node: ");
1409:
1410: System.out.print("firstPos=");
1411: System.out.print(nodeCur.firstPos().toString());
1412: System.out.print(" lastPos=");
1413: System.out.println(nodeCur.lastPos().toString());
1414: break;
1415: }
1416: default: {
1417: throw new CMException(ImplementationMessages.VAL_NIICM);
1418: }
1419: }
1420:
1421: }
1422:
1423: /**
1424: * -1 is used to represent bad transitions in the transition table
1425: * entry for each state. So each entry is initialized to an all -1
1426: * array. This method creates a new entry and initializes it.
1427: */
1428: private int[] makeDefStateList() {
1429: int[] retArray = new int[fElemMapSize];
1430: for (int index = 0; index < fElemMapSize; index++)
1431: retArray[index] = -1;
1432: return retArray;
1433: }
1434:
1435: /** Post tree build initialization. */
1436: private int postTreeBuildInit(CMNode nodeCur, int curIndex)
1437: throws CMException {
1438: // Set the maximum states on this node
1439: nodeCur.setMaxStates(fLeafCount);
1440:
1441: // Recurse as required
1442: if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY
1443: || (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_NS
1444: || (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
1445: // REVISIT: Don't waste these structures.
1446: QName qname = new QName(-1, -1, -1, ((CMAny) nodeCur)
1447: .getURI());
1448: fLeafList[curIndex] = new CMLeaf(qname, ((CMAny) nodeCur)
1449: .getPosition());
1450: fLeafListType[curIndex] = nodeCur.type();
1451: curIndex++;
1452: } else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
1453: || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)) {
1454: curIndex = postTreeBuildInit(((CMBinOp) nodeCur).getLeft(),
1455: curIndex);
1456: curIndex = postTreeBuildInit(
1457: ((CMBinOp) nodeCur).getRight(), curIndex);
1458: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
1459: || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
1460: || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
1461: curIndex = postTreeBuildInit(
1462: ((CMUniOp) nodeCur).getChild(), curIndex);
1463: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) {
1464: //
1465: // Put this node in the leaf list at the current index if its
1466: // a non-epsilon leaf.
1467: //
1468: final QName node = ((CMLeaf) nodeCur).getElement();
1469: if (node.localpart != fEpsilonIndex) {
1470: fLeafList[curIndex] = (CMLeaf) nodeCur;
1471: fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
1472: curIndex++;
1473: }
1474: } else {
1475: throw new CMException(ImplementationMessages.VAL_NIICM);
1476: }
1477: return curIndex;
1478: }
1479:
1480: } // class DFAContentModel
|