0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: package org.apache.xerces.impl.xs.models;
0019:
0020: import java.util.HashMap;
0021: import java.util.Vector;
0022:
0023: import org.apache.xerces.impl.dtd.models.CMNode;
0024: import org.apache.xerces.impl.dtd.models.CMStateSet;
0025: import org.apache.xerces.impl.xs.SchemaSymbols;
0026: import org.apache.xerces.impl.xs.SubstitutionGroupHandler;
0027: import org.apache.xerces.impl.xs.XMLSchemaException;
0028: import org.apache.xerces.impl.xs.XSConstraints;
0029: import org.apache.xerces.impl.xs.XSElementDecl;
0030: import org.apache.xerces.impl.xs.XSModelGroupImpl;
0031: import org.apache.xerces.impl.xs.XSParticleDecl;
0032: import org.apache.xerces.impl.xs.XSWildcardDecl;
0033: import org.apache.xerces.xni.QName;
0034:
0035: /**
0036: * DFAContentModel is the implementation of XSCMValidator that does
0037: * all of the non-trivial element content validation. This class does
0038: * the conversion from the regular expression to the DFA that
0039: * it then uses in its validation algorithm.
0040: *
0041: * @xerces.internal
0042: *
0043: * @author Neil Graham, IBM
0044: * @version $Id: XSDFACM.java 573322 2007-09-06 16:48:47Z peterjm $
0045: */
0046: public class XSDFACM implements XSCMValidator {
0047:
0048: //
0049: // Constants
0050: //
0051: private static final boolean DEBUG = false;
0052:
0053: // special strings
0054:
0055: // debugging
0056:
0057: /** Set to true to debug content model validation. */
0058: private static final boolean DEBUG_VALIDATE_CONTENT = false;
0059:
0060: //
0061: // Data
0062: //
0063:
0064: /**
0065: * This is the map of unique input symbol elements to indices into
0066: * each state's per-input symbol transition table entry. This is part
0067: * of the built DFA information that must be kept around to do the
0068: * actual validation. Note tat since either XSElementDecl or XSParticleDecl object
0069: * can live here, we've got to use an Object.
0070: */
0071: private Object fElemMap[] = null;
0072:
0073: /**
0074: * This is a map of whether the element map contains information
0075: * related to ANY models.
0076: */
0077: private int fElemMapType[] = null;
0078:
0079: /**
0080: * id of the unique input symbol
0081: */
0082: private int fElemMapId[] = null;
0083:
0084: /** The element map size. */
0085: private int fElemMapSize = 0;
0086:
0087: /**
0088: * This is an array of booleans, one per state (there are
0089: * fTransTableSize states in the DFA) that indicates whether that
0090: * state is a final state.
0091: */
0092: private boolean fFinalStateFlags[] = null;
0093:
0094: /**
0095: * The list of follow positions for each NFA position (i.e. for each
0096: * non-epsilon leaf node.) This is only used during the building of
0097: * the DFA, and is let go afterwards.
0098: */
0099: private CMStateSet fFollowList[] = null;
0100:
0101: /**
0102: * This is the head node of our intermediate representation. It is
0103: * only non-null during the building of the DFA (just so that it
0104: * does not have to be passed all around.) Once the DFA is built,
0105: * this is no longer required so its nulled out.
0106: */
0107: private CMNode fHeadNode = null;
0108:
0109: /**
0110: * The count of leaf nodes. This is an important number that set some
0111: * limits on the sizes of data structures in the DFA process.
0112: */
0113: private int fLeafCount = 0;
0114:
0115: /**
0116: * An array of non-epsilon leaf nodes, which is used during the DFA
0117: * build operation, then dropped.
0118: */
0119: private XSCMLeaf fLeafList[] = null;
0120:
0121: /** Array mapping ANY types to the leaf list. */
0122: private int fLeafListType[] = null;
0123:
0124: /**
0125: * This is the transition table that is the main by product of all
0126: * of the effort here. It is an array of arrays of ints. The first
0127: * dimension is the number of states we end up with in the DFA. The
0128: * second dimensions is the number of unique elements in the content
0129: * model (fElemMapSize). Each entry in the second dimension indicates
0130: * the new state given that input for the first dimension's start
0131: * state.
0132: * <p>
0133: * The fElemMap array handles mapping from element indexes to
0134: * positions in the second dimension of the transition table.
0135: */
0136: private int fTransTable[][] = null;
0137:
0138: /**
0139: * Array containing occurence information for looping states
0140: * which use counters to check minOccurs/maxOccurs.
0141: */
0142: private Occurence[] fCountingStates = null;
0143:
0144: static final class Occurence {
0145: final int minOccurs;
0146: final int maxOccurs;
0147: final int elemIndex;
0148:
0149: public Occurence(XSCMRepeatingLeaf leaf, int elemIndex) {
0150: minOccurs = leaf.getMinOccurs();
0151: maxOccurs = leaf.getMaxOccurs();
0152: this .elemIndex = elemIndex;
0153: }
0154:
0155: public String toString() {
0156: return "minOccurs="
0157: + minOccurs
0158: + ";maxOccurs="
0159: + ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) ? Integer
0160: .toString(maxOccurs)
0161: : "unbounded");
0162: }
0163: }
0164:
0165: /**
0166: * The number of valid entries in the transition table, and in the other
0167: * related tables such as fFinalStateFlags.
0168: */
0169: private int fTransTableSize = 0;
0170:
0171: private boolean fIsCompactedForUPA;
0172:
0173: // temp variables
0174:
0175: //
0176: // Constructors
0177: //
0178:
0179: /**
0180: * Constructs a DFA content model.
0181: *
0182: * @param syntaxTree The syntax tree of the content model.
0183: * @param leafCount The number of leaves.
0184: *
0185: * @exception RuntimeException Thrown if DFA can't be built.
0186: */
0187:
0188: public XSDFACM(CMNode syntaxTree, int leafCount) {
0189:
0190: // Store away our index and pools in members
0191: fLeafCount = leafCount;
0192: fIsCompactedForUPA = syntaxTree.isCompactedForUPA();
0193:
0194: //
0195: // Create some string pool indexes that represent the names of some
0196: // magical nodes in the syntax tree.
0197: // (already done in static initialization...
0198: //
0199:
0200: //
0201: // Ok, so lets grind through the building of the DFA. This method
0202: // handles the high level logic of the algorithm, but it uses a
0203: // number of helper classes to do its thing.
0204: //
0205: // In order to avoid having hundreds of references to the error and
0206: // string handlers around, this guy and all of his helper classes
0207: // just throw a simple exception and we then pass it along.
0208: //
0209:
0210: if (DEBUG_VALIDATE_CONTENT) {
0211: XSDFACM.time -= System.currentTimeMillis();
0212: }
0213:
0214: buildDFA(syntaxTree);
0215:
0216: if (DEBUG_VALIDATE_CONTENT) {
0217: XSDFACM.time += System.currentTimeMillis();
0218: System.out.println("DFA build: " + XSDFACM.time + "ms");
0219: }
0220: }
0221:
0222: private static long time = 0;
0223:
0224: //
0225: // XSCMValidator methods
0226: //
0227:
0228: /**
0229: * check whether the given state is one of the final states
0230: *
0231: * @param state the state to check
0232: *
0233: * @return whether it's a final state
0234: */
0235: public boolean isFinalState(int state) {
0236: return (state < 0) ? false : fFinalStateFlags[state];
0237: }
0238:
0239: /**
0240: * one transition only
0241: *
0242: * @param curElem The current element's QName
0243: * @param state stack to store the previous state
0244: * @param subGroupHandler the substitution group handler
0245: *
0246: * @return null if transition is invalid; otherwise the Object corresponding to the
0247: * XSElementDecl or XSWildcardDecl identified. Also, the
0248: * state array will be modified to include the new state; this so that the validator can
0249: * store it away.
0250: *
0251: * @exception RuntimeException thrown on error
0252: */
0253: public Object oneTransition(QName curElem, int[] state,
0254: SubstitutionGroupHandler subGroupHandler) {
0255: int curState = state[0];
0256:
0257: if (curState == XSCMValidator.FIRST_ERROR
0258: || curState == XSCMValidator.SUBSEQUENT_ERROR) {
0259: // there was an error last time; so just go find correct Object in fElemmMap.
0260: // ... after resetting state[0].
0261: if (curState == XSCMValidator.FIRST_ERROR)
0262: state[0] = XSCMValidator.SUBSEQUENT_ERROR;
0263:
0264: return findMatchingDecl(curElem, subGroupHandler);
0265: }
0266:
0267: int nextState = 0;
0268: int elemIndex = 0;
0269: Object matchingDecl = null;
0270:
0271: for (; elemIndex < fElemMapSize; elemIndex++) {
0272: nextState = fTransTable[curState][elemIndex];
0273: if (nextState == -1)
0274: continue;
0275: int type = fElemMapType[elemIndex];
0276: if (type == XSParticleDecl.PARTICLE_ELEMENT) {
0277: matchingDecl = subGroupHandler.getMatchingElemDecl(
0278: curElem, (XSElementDecl) fElemMap[elemIndex]);
0279: if (matchingDecl != null) {
0280: break;
0281: }
0282: } else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
0283: if (((XSWildcardDecl) fElemMap[elemIndex])
0284: .allowNamespace(curElem.uri)) {
0285: matchingDecl = fElemMap[elemIndex];
0286: break;
0287: }
0288: }
0289: }
0290:
0291: // if we still can't find a match, set the state to first_error
0292: // and return null
0293: if (elemIndex == fElemMapSize) {
0294: state[1] = state[0];
0295: state[0] = XSCMValidator.FIRST_ERROR;
0296: return findMatchingDecl(curElem, subGroupHandler);
0297: }
0298:
0299: if (fCountingStates != null) {
0300: Occurence o = fCountingStates[curState];
0301: if (o != null) {
0302: if (curState == nextState) {
0303: if (++state[2] > o.maxOccurs
0304: && o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
0305: // It's likely that we looped too many times on the current state
0306: // however it's possible that we actually matched another particle
0307: // which allows the same name.
0308: //
0309: // Consider:
0310: //
0311: // <xs:sequence>
0312: // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
0313: // <xs:element name="foo" type="xs:string" fixed="bar"/>
0314: // </xs:sequence>
0315: //
0316: // and
0317: //
0318: // <xs:sequence>
0319: // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
0320: // <xs:any namespace="##any" processContents="skip"/>
0321: // </xs:sequence>
0322: //
0323: // In the DFA there will be two transitions from the current state which
0324: // allow "foo". Note that this is not a UPA violation. The ambiguity of which
0325: // transition to take is resolved by the current value of the counter. Since
0326: // we've already seen enough instances of the first "foo" perhaps there is
0327: // another element declaration or wildcard deeper in the element map which
0328: // matches.
0329: return findMatchingDecl(curElem, state,
0330: subGroupHandler, elemIndex);
0331: }
0332: } else if (state[2] < o.minOccurs) {
0333: // not enough loops on the current state.
0334: state[1] = state[0];
0335: state[0] = XSCMValidator.FIRST_ERROR;
0336: return findMatchingDecl(curElem, subGroupHandler);
0337: } else {
0338: // Exiting a counting state. If we're entering a new
0339: // counting state, reset the counter.
0340: o = fCountingStates[nextState];
0341: if (o != null) {
0342: state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
0343: }
0344: }
0345: } else {
0346: o = fCountingStates[nextState];
0347: if (o != null) {
0348: // Entering a new counting state. Reset the counter.
0349: // If we've already seen one instance of the looping
0350: // particle set the counter to 1, otherwise set it
0351: // to 0.
0352: state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
0353: }
0354: }
0355: }
0356:
0357: state[0] = nextState;
0358: return matchingDecl;
0359: } // oneTransition(QName, int[], SubstitutionGroupHandler): Object
0360:
0361: Object findMatchingDecl(QName curElem,
0362: SubstitutionGroupHandler subGroupHandler) {
0363: Object matchingDecl = null;
0364:
0365: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
0366: int type = fElemMapType[elemIndex];
0367: if (type == XSParticleDecl.PARTICLE_ELEMENT) {
0368: matchingDecl = subGroupHandler.getMatchingElemDecl(
0369: curElem, (XSElementDecl) fElemMap[elemIndex]);
0370: if (matchingDecl != null) {
0371: return matchingDecl;
0372: }
0373: } else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
0374: if (((XSWildcardDecl) fElemMap[elemIndex])
0375: .allowNamespace(curElem.uri))
0376: return fElemMap[elemIndex];
0377: }
0378: }
0379:
0380: return null;
0381: } // findMatchingDecl(QName, SubstitutionGroupHandler): Object
0382:
0383: Object findMatchingDecl(QName curElem, int[] state,
0384: SubstitutionGroupHandler subGroupHandler, int elemIndex) {
0385:
0386: int curState = state[0];
0387: int nextState = 0;
0388: Object matchingDecl = null;
0389:
0390: while (++elemIndex < fElemMapSize) {
0391: nextState = fTransTable[curState][elemIndex];
0392: if (nextState == -1)
0393: continue;
0394: int type = fElemMapType[elemIndex];
0395: if (type == XSParticleDecl.PARTICLE_ELEMENT) {
0396: matchingDecl = subGroupHandler.getMatchingElemDecl(
0397: curElem, (XSElementDecl) fElemMap[elemIndex]);
0398: if (matchingDecl != null) {
0399: break;
0400: }
0401: } else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
0402: if (((XSWildcardDecl) fElemMap[elemIndex])
0403: .allowNamespace(curElem.uri)) {
0404: matchingDecl = fElemMap[elemIndex];
0405: break;
0406: }
0407: }
0408: }
0409:
0410: // if we still can't find a match, set the state to FIRST_ERROR and return null
0411: if (elemIndex == fElemMapSize) {
0412: state[1] = state[0];
0413: state[0] = XSCMValidator.FIRST_ERROR;
0414: return findMatchingDecl(curElem, subGroupHandler);
0415: }
0416:
0417: // if we found a match, set the next state and reset the
0418: // counter if the next state is a counting state.
0419: state[0] = nextState;
0420: final Occurence o = fCountingStates[nextState];
0421: if (o != null) {
0422: state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
0423: }
0424: return matchingDecl;
0425: } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object
0426:
0427: // This method returns the start states of the content model.
0428: public int[] startContentModel() {
0429: // [0] : the current state
0430: // [1] : if [0] is an error state then the
0431: // last valid state before the error
0432: // [2] : occurence counter for counting states
0433: return new int[3];
0434: } // startContentModel():int[]
0435:
0436: // this method returns whether the last state was a valid final state
0437: public boolean endContentModel(int[] state) {
0438: final int curState = state[0];
0439: if (fFinalStateFlags[curState]) {
0440: if (fCountingStates != null) {
0441: Occurence o = fCountingStates[curState];
0442: if (o != null && state[2] < o.minOccurs) {
0443: // not enough loops on the current state to be considered final.
0444: return false;
0445: }
0446: }
0447: return true;
0448: }
0449: return false;
0450: } // endContentModel(int[]): boolean
0451:
0452: // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc.,
0453: // but we can put it back later.
0454:
0455: //
0456: // Private methods
0457: //
0458:
0459: /**
0460: * Builds the internal DFA transition table from the given syntax tree.
0461: *
0462: * @param syntaxTree The syntax tree.
0463: *
0464: * @exception RuntimeException Thrown if DFA cannot be built.
0465: */
0466: private void buildDFA(CMNode syntaxTree) {
0467: //
0468: // The first step we need to take is to rewrite the content model
0469: // using our CMNode objects, and in the process get rid of any
0470: // repetition short cuts, converting them into '*' style repetitions
0471: // or getting rid of repetitions altogether.
0472: //
0473: // The conversions done are:
0474: //
0475: // x+ -> (x|x*)
0476: // x? -> (x|epsilon)
0477: //
0478: // This is a relatively complex scenario. What is happening is that
0479: // we create a top level binary node of which the special EOC value
0480: // is set as the right side node. The the left side is set to the
0481: // rewritten syntax tree. The source is the original content model
0482: // info from the decl pool. The rewrite is done by buildSyntaxTree()
0483: // which recurses the decl pool's content of the element and builds
0484: // a new tree in the process.
0485: //
0486: // Note that, during this operation, we set each non-epsilon leaf
0487: // node's DFA state position and count the number of such leafs, which
0488: // is left in the fLeafCount member.
0489: //
0490: // The nodeTmp object is passed in just as a temp node to use during
0491: // the recursion. Otherwise, we'd have to create a new node on every
0492: // level of recursion, which would be piggy in Java (as is everything
0493: // for that matter.)
0494: //
0495:
0496: /* MODIFIED (Jan, 2001)
0497: *
0498: * Use following rules.
0499: * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
0500: * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
0501: *
0502: * The same computation of follow as x* is applied to x+
0503: *
0504: * The modification drastically reduces computation time of
0505: * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
0506: */
0507:
0508: //
0509: // And handle specially the EOC node, which also must be numbered
0510: // and counted as a non-epsilon leaf node. It could not be handled
0511: // in the above tree build because it was created before all that
0512: // started. We save the EOC position since its used during the DFA
0513: // building loop.
0514: //
0515: int EOCPos = fLeafCount;
0516: XSCMLeaf nodeEOC = new XSCMLeaf(
0517: XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++);
0518: fHeadNode = new XSCMBinOp(XSModelGroupImpl.MODELGROUP_SEQUENCE,
0519: syntaxTree, nodeEOC);
0520:
0521: //
0522: // Ok, so now we have to iterate the new tree and do a little more
0523: // work now that we know the leaf count. One thing we need to do is
0524: // to calculate the first and last position sets of each node. This
0525: // is cached away in each of the nodes.
0526: //
0527: // Along the way we also set the leaf count in each node as the
0528: // maximum state count. They must know this in order to create their
0529: // first/last pos sets.
0530: //
0531: // We also need to build an array of references to the non-epsilon
0532: // leaf nodes. Since we iterate it in the same way as before, this
0533: // will put them in the array according to their position values.
0534: //
0535: fLeafList = new XSCMLeaf[fLeafCount];
0536: fLeafListType = new int[fLeafCount];
0537: postTreeBuildInit(fHeadNode);
0538:
0539: //
0540: // And, moving onward... We now need to build the follow position
0541: // sets for all the nodes. So we allocate an array of state sets,
0542: // one for each leaf node (i.e. each DFA position.)
0543: //
0544: fFollowList = new CMStateSet[fLeafCount];
0545: for (int index = 0; index < fLeafCount; index++)
0546: fFollowList[index] = new CMStateSet(fLeafCount);
0547: calcFollowList(fHeadNode);
0548: //
0549: // And finally the big push... Now we build the DFA using all the
0550: // states and the tree we've built up. First we set up the various
0551: // data structures we are going to use while we do this.
0552: //
0553: // First of all we need an array of unique element names in our
0554: // content model. For each transition table entry, we need a set of
0555: // contiguous indices to represent the transitions for a particular
0556: // input element. So we need to a zero based range of indexes that
0557: // map to element types. This element map provides that mapping.
0558: //
0559: fElemMap = new Object[fLeafCount];
0560: fElemMapType = new int[fLeafCount];
0561: fElemMapId = new int[fLeafCount];
0562: fElemMapSize = 0;
0563: Occurence[] elemOccurenceMap = null;
0564: for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
0565: // optimization from Henry Zongaro:
0566: //fElemMap[outIndex] = new Object ();
0567: fElemMap[outIndex] = null;
0568:
0569: int inIndex = 0;
0570: final int id = fLeafList[outIndex].getParticleId();
0571: for (; inIndex < fElemMapSize; inIndex++) {
0572: if (id == fElemMapId[inIndex])
0573: break;
0574: }
0575:
0576: // If it was not in the list, then add it, if not the EOC node
0577: if (inIndex == fElemMapSize) {
0578: XSCMLeaf leaf = fLeafList[outIndex];
0579: fElemMap[fElemMapSize] = leaf.getLeaf();
0580: if (leaf instanceof XSCMRepeatingLeaf) {
0581: if (elemOccurenceMap == null) {
0582: elemOccurenceMap = new Occurence[fLeafCount];
0583: }
0584: elemOccurenceMap[fElemMapSize] = new Occurence(
0585: (XSCMRepeatingLeaf) leaf, fElemMapSize);
0586: }
0587: fElemMapType[fElemMapSize] = fLeafListType[outIndex];
0588: fElemMapId[fElemMapSize] = id;
0589: fElemMapSize++;
0590: }
0591: }
0592:
0593: // the last entry in the element map must be the EOC element.
0594: // remove it from the map.
0595: if (DEBUG) {
0596: if (fElemMapId[fElemMapSize - 1] != -1)
0597: System.err
0598: .println("interal error in DFA: last element is not EOC.");
0599: }
0600: fElemMapSize--;
0601:
0602: /***
0603: * Optimization(Jan, 2001); We sort fLeafList according to
0604: * elemIndex which is *uniquely* associated to each leaf.
0605: * We are *assuming* that each element appears in at least one leaf.
0606: **/
0607:
0608: int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
0609: int fSortCount = 0;
0610:
0611: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
0612: final int id = fElemMapId[elemIndex];
0613: for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
0614: if (id == fLeafList[leafIndex].getParticleId())
0615: fLeafSorter[fSortCount++] = leafIndex;
0616: }
0617: fLeafSorter[fSortCount++] = -1;
0618: }
0619:
0620: /* Optimization(Jan, 2001) */
0621:
0622: //
0623: // Next lets create some arrays, some that hold transient
0624: // information during the DFA build and some that are permament.
0625: // These are kind of sticky since we cannot know how big they will
0626: // get, but we don't want to use any Java collections because of
0627: // performance.
0628: //
0629: // Basically they will probably be about fLeafCount*2 on average,
0630: // but can be as large as 2^(fLeafCount*2), worst case. So we start
0631: // with fLeafCount*4 as a middle ground. This will be very unlikely
0632: // to ever have to expand, though it if does, the overhead will be
0633: // somewhat ugly.
0634: //
0635: int curArraySize = fLeafCount * 4;
0636: CMStateSet[] statesToDo = new CMStateSet[curArraySize];
0637: fFinalStateFlags = new boolean[curArraySize];
0638: fTransTable = new int[curArraySize][];
0639:
0640: //
0641: // Ok we start with the initial set as the first pos set of the
0642: // head node (which is the seq node that holds the content model
0643: // and the EOC node.)
0644: //
0645: CMStateSet setT = fHeadNode.firstPos();
0646:
0647: //
0648: // Init our two state flags. Basically the unmarked state counter
0649: // is always chasing the current state counter. When it catches up,
0650: // that means we made a pass through that did not add any new states
0651: // to the lists, at which time we are done. We could have used a
0652: // expanding array of flags which we used to mark off states as we
0653: // complete them, but this is easier though less readable maybe.
0654: //
0655: int unmarkedState = 0;
0656: int curState = 0;
0657:
0658: //
0659: // Init the first transition table entry, and put the initial state
0660: // into the states to do list, then bump the current state.
0661: //
0662: fTransTable[curState] = makeDefStateList();
0663: statesToDo[curState] = setT;
0664: curState++;
0665:
0666: /* Optimization(Jan, 2001); This is faster for
0667: * a large content model such as, "(t001+|t002+|.... |t500+)".
0668: */
0669:
0670: HashMap stateTable = new HashMap();
0671:
0672: /* Optimization(Jan, 2001) */
0673:
0674: //
0675: // Ok, almost done with the algorithm... We now enter the
0676: // loop where we go until the states done counter catches up with
0677: // the states to do counter.
0678: //
0679: while (unmarkedState < curState) {
0680: //
0681: // Get the first unmarked state out of the list of states to do.
0682: // And get the associated transition table entry.
0683: //
0684: setT = statesToDo[unmarkedState];
0685: int[] transEntry = fTransTable[unmarkedState];
0686:
0687: // Mark this one final if it contains the EOC state
0688: fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);
0689:
0690: // Bump up the unmarked state count, marking this state done
0691: unmarkedState++;
0692:
0693: // Loop through each possible input symbol in the element map
0694: CMStateSet newSet = null;
0695: /* Optimization(Jan, 2001) */
0696: int sorterIndex = 0;
0697: /* Optimization(Jan, 2001) */
0698: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
0699: //
0700: // Build up a set of states which is the union of all of
0701: // the follow sets of DFA positions that are in the current
0702: // state. If we gave away the new set last time through then
0703: // create a new one. Otherwise, zero out the existing one.
0704: //
0705: if (newSet == null)
0706: newSet = new CMStateSet(fLeafCount);
0707: else
0708: newSet.zeroBits();
0709:
0710: /* Optimization(Jan, 2001) */
0711: int leafIndex = fLeafSorter[sorterIndex++];
0712:
0713: while (leafIndex != -1) {
0714: // If this leaf index (DFA position) is in the current set...
0715: if (setT.getBit(leafIndex)) {
0716: //
0717: // If this leaf is the current input symbol, then we
0718: // want to add its follow list to the set of states to
0719: // transition to from the current state.
0720: //
0721: newSet.union(fFollowList[leafIndex]);
0722: }
0723:
0724: leafIndex = fLeafSorter[sorterIndex++];
0725: }
0726: /* Optimization(Jan, 2001) */
0727:
0728: //
0729: // If this new set is not empty, then see if its in the list
0730: // of states to do. If not, then add it.
0731: //
0732: if (!newSet.isEmpty()) {
0733: //
0734: // Search the 'states to do' list to see if this new
0735: // state set is already in there.
0736: //
0737:
0738: /* Optimization(Jan, 2001) */
0739: Integer stateObj = (Integer) stateTable.get(newSet);
0740: int stateIndex = (stateObj == null ? curState
0741: : stateObj.intValue());
0742: /* Optimization(Jan, 2001) */
0743:
0744: // If we did not find it, then add it
0745: if (stateIndex == curState) {
0746: //
0747: // Put this new state into the states to do and init
0748: // a new entry at the same index in the transition
0749: // table.
0750: //
0751: statesToDo[curState] = newSet;
0752: fTransTable[curState] = makeDefStateList();
0753:
0754: /* Optimization(Jan, 2001) */
0755: stateTable.put(newSet, new Integer(curState));
0756: /* Optimization(Jan, 2001) */
0757:
0758: // We now have a new state to do so bump the count
0759: curState++;
0760:
0761: //
0762: // Null out the new set to indicate we adopted it.
0763: // This will cause the creation of a new set on the
0764: // next time around the loop.
0765: //
0766: newSet = null;
0767: }
0768:
0769: //
0770: // Now set this state in the transition table's entry
0771: // for this element (using its index), with the DFA
0772: // state we will move to from the current state when we
0773: // see this input element.
0774: //
0775: transEntry[elemIndex] = stateIndex;
0776:
0777: // Expand the arrays if we're full
0778: if (curState == curArraySize) {
0779: //
0780: // Yikes, we overflowed the initial array size, so
0781: // we've got to expand all of these arrays. So adjust
0782: // up the size by 50% and allocate new arrays.
0783: //
0784: final int newSize = (int) (curArraySize * 1.5);
0785: CMStateSet[] newToDo = new CMStateSet[newSize];
0786: boolean[] newFinalFlags = new boolean[newSize];
0787: int[][] newTransTable = new int[newSize][];
0788:
0789: // Copy over all of the existing content
0790: System.arraycopy(statesToDo, 0, newToDo, 0,
0791: curArraySize);
0792: System.arraycopy(fFinalStateFlags, 0,
0793: newFinalFlags, 0, curArraySize);
0794: System.arraycopy(fTransTable, 0, newTransTable,
0795: 0, curArraySize);
0796:
0797: // Store the new array size
0798: curArraySize = newSize;
0799: statesToDo = newToDo;
0800: fFinalStateFlags = newFinalFlags;
0801: fTransTable = newTransTable;
0802: }
0803: }
0804: }
0805: }
0806:
0807: //
0808: // Fill in the occurence information for each looping state
0809: // if we're using counters.
0810: //
0811: if (elemOccurenceMap != null) {
0812: fCountingStates = new Occurence[curState];
0813: for (int i = 0; i < curState; ++i) {
0814: int[] transitions = fTransTable[i];
0815: for (int j = 0; j < transitions.length; ++j) {
0816: if (i == transitions[j]) {
0817: fCountingStates[i] = elemOccurenceMap[j];
0818: break;
0819: }
0820: }
0821: }
0822: }
0823:
0824: //
0825: // And now we can say bye bye to the temp representation since we've
0826: // built the DFA.
0827: //
0828: if (DEBUG_VALIDATE_CONTENT)
0829: dumpTree(fHeadNode, 0);
0830: fHeadNode = null;
0831: fLeafList = null;
0832: fFollowList = null;
0833: fLeafListType = null;
0834: fElemMapId = null;
0835: }
0836:
0837: /**
0838: * Calculates the follow list of the current node.
0839: *
0840: * @param nodeCur The curent node.
0841: *
0842: * @exception RuntimeException Thrown if follow list cannot be calculated.
0843: */
0844: private void calcFollowList(CMNode nodeCur) {
0845: // Recurse as required
0846: if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) {
0847: // Recurse only
0848: calcFollowList(((XSCMBinOp) nodeCur).getLeft());
0849: calcFollowList(((XSCMBinOp) nodeCur).getRight());
0850: } else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
0851: // Recurse first
0852: calcFollowList(((XSCMBinOp) nodeCur).getLeft());
0853: calcFollowList(((XSCMBinOp) nodeCur).getRight());
0854:
0855: //
0856: // Now handle our level. We use our left child's last pos
0857: // set and our right child's first pos set, so go ahead and
0858: // get them ahead of time.
0859: //
0860: final CMStateSet last = ((XSCMBinOp) nodeCur).getLeft()
0861: .lastPos();
0862: final CMStateSet first = ((XSCMBinOp) nodeCur).getRight()
0863: .firstPos();
0864:
0865: //
0866: // Now, for every position which is in our left child's last set
0867: // add all of the states in our right child's first set to the
0868: // follow set for that position.
0869: //
0870: for (int index = 0; index < fLeafCount; index++) {
0871: if (last.getBit(index))
0872: fFollowList[index].union(first);
0873: }
0874: } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
0875: || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) {
0876: // Recurse first
0877: calcFollowList(((XSCMUniOp) nodeCur).getChild());
0878:
0879: //
0880: // Now handle our level. We use our own first and last position
0881: // sets, so get them up front.
0882: //
0883: final CMStateSet first = nodeCur.firstPos();
0884: final CMStateSet last = nodeCur.lastPos();
0885:
0886: //
0887: // For every position which is in our last position set, add all
0888: // of our first position states to the follow set for that
0889: // position.
0890: //
0891: for (int index = 0; index < fLeafCount; index++) {
0892: if (last.getBit(index))
0893: fFollowList[index].union(first);
0894: }
0895: }
0896:
0897: else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
0898: // Recurse only
0899: calcFollowList(((XSCMUniOp) nodeCur).getChild());
0900: }
0901:
0902: }
0903:
0904: /**
0905: * Dumps the tree of the current node to standard output.
0906: *
0907: * @param nodeCur The current node.
0908: * @param level The maximum levels to output.
0909: *
0910: * @exception RuntimeException Thrown on error.
0911: */
0912: private void dumpTree(CMNode nodeCur, int level) {
0913: for (int index = 0; index < level; index++)
0914: System.out.print(" ");
0915:
0916: int type = nodeCur.type();
0917:
0918: switch (type) {
0919:
0920: case XSModelGroupImpl.MODELGROUP_CHOICE:
0921: case XSModelGroupImpl.MODELGROUP_SEQUENCE: {
0922: if (type == XSModelGroupImpl.MODELGROUP_CHOICE)
0923: System.out.print("Choice Node ");
0924: else
0925: System.out.print("Seq Node ");
0926:
0927: if (nodeCur.isNullable())
0928: System.out.print("Nullable ");
0929:
0930: System.out.print("firstPos=");
0931: System.out.print(nodeCur.firstPos().toString());
0932: System.out.print(" lastPos=");
0933: System.out.println(nodeCur.lastPos().toString());
0934:
0935: dumpTree(((XSCMBinOp) nodeCur).getLeft(), level + 1);
0936: dumpTree(((XSCMBinOp) nodeCur).getRight(), level + 1);
0937: break;
0938: }
0939: case XSParticleDecl.PARTICLE_ZERO_OR_MORE:
0940: case XSParticleDecl.PARTICLE_ONE_OR_MORE:
0941: case XSParticleDecl.PARTICLE_ZERO_OR_ONE: {
0942: System.out.print("Rep Node ");
0943:
0944: if (nodeCur.isNullable())
0945: System.out.print("Nullable ");
0946:
0947: System.out.print("firstPos=");
0948: System.out.print(nodeCur.firstPos().toString());
0949: System.out.print(" lastPos=");
0950: System.out.println(nodeCur.lastPos().toString());
0951:
0952: dumpTree(((XSCMUniOp) nodeCur).getChild(), level + 1);
0953: break;
0954: }
0955: case XSParticleDecl.PARTICLE_ELEMENT: {
0956: System.out.print("Leaf: (pos="
0957: + ((XSCMLeaf) nodeCur).getPosition() + "), "
0958: + "(elemIndex=" + ((XSCMLeaf) nodeCur).getLeaf()
0959: + ") ");
0960:
0961: if (nodeCur.isNullable())
0962: System.out.print(" Nullable ");
0963:
0964: System.out.print("firstPos=");
0965: System.out.print(nodeCur.firstPos().toString());
0966: System.out.print(" lastPos=");
0967: System.out.println(nodeCur.lastPos().toString());
0968: break;
0969: }
0970: case XSParticleDecl.PARTICLE_WILDCARD:
0971: System.out.print("Any Node: ");
0972:
0973: System.out.print("firstPos=");
0974: System.out.print(nodeCur.firstPos().toString());
0975: System.out.print(" lastPos=");
0976: System.out.println(nodeCur.lastPos().toString());
0977: break;
0978: default: {
0979: throw new RuntimeException(
0980: "ImplementationMessages.VAL_NIICM");
0981: }
0982: }
0983:
0984: }
0985:
0986: /**
0987: * -1 is used to represent bad transitions in the transition table
0988: * entry for each state. So each entry is initialized to an all -1
0989: * array. This method creates a new entry and initializes it.
0990: */
0991: private int[] makeDefStateList() {
0992: int[] retArray = new int[fElemMapSize];
0993: for (int index = 0; index < fElemMapSize; index++)
0994: retArray[index] = -1;
0995: return retArray;
0996: }
0997:
0998: /** Post tree build initialization. */
0999: private void postTreeBuildInit(CMNode nodeCur)
1000: throws RuntimeException {
1001: // Set the maximum states on this node
1002: nodeCur.setMaxStates(fLeafCount);
1003:
1004: XSCMLeaf leaf = null;
1005: int pos = 0;
1006: // Recurse as required
1007: if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) {
1008: leaf = (XSCMLeaf) nodeCur;
1009: pos = leaf.getPosition();
1010: fLeafList[pos] = leaf;
1011: fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD;
1012: } else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE)
1013: || (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) {
1014: postTreeBuildInit(((XSCMBinOp) nodeCur).getLeft());
1015: postTreeBuildInit(((XSCMBinOp) nodeCur).getRight());
1016: } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
1017: || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE
1018: || nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
1019: postTreeBuildInit(((XSCMUniOp) nodeCur).getChild());
1020: } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) {
1021: // Put this node in the leaf list at the current index if its
1022: // a non-epsilon leaf.
1023: leaf = (XSCMLeaf) nodeCur;
1024: pos = leaf.getPosition();
1025: fLeafList[pos] = leaf;
1026: fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT;
1027: } else {
1028: throw new RuntimeException(
1029: "ImplementationMessages.VAL_NIICM");
1030: }
1031: }
1032:
1033: /**
1034: * check whether this content violates UPA constraint.
1035: *
1036: * @param subGroupHandler the substitution group handler
1037: * @return true if this content model contains other or list wildcard
1038: */
1039: public boolean checkUniqueParticleAttribution(
1040: SubstitutionGroupHandler subGroupHandler)
1041: throws XMLSchemaException {
1042: // Unique Particle Attribution
1043: // store the conflict results between any two elements in fElemMap
1044: // 0: not compared; -1: no conflict; 1: conflict
1045: // initialize the conflict table (all 0 initially)
1046: byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize];
1047:
1048: // for each state, check whether it has overlap transitions
1049: for (int i = 0; i < fTransTable.length
1050: && fTransTable[i] != null; i++) {
1051: for (int j = 0; j < fElemMapSize; j++) {
1052: for (int k = j + 1; k < fElemMapSize; k++) {
1053: if (fTransTable[i][j] != -1
1054: && fTransTable[i][k] != -1) {
1055: if (conflictTable[j][k] == 0) {
1056: if (XSConstraints.overlapUPA(fElemMap[j],
1057: fElemMap[k], subGroupHandler)) {
1058: if (fCountingStates != null) {
1059: Occurence o = fCountingStates[i];
1060: // If "i" is a counting state and exactly one of the transitions
1061: // loops back to "i" then the two particles do not overlap if
1062: // minOccurs == maxOccurs.
1063: if (o != null
1064: && fTransTable[i][j] == i
1065: ^ fTransTable[i][k] == i
1066: && o.minOccurs == o.maxOccurs) {
1067: conflictTable[j][k] = (byte) -1;
1068: continue;
1069: }
1070: }
1071: conflictTable[j][k] = (byte) 1;
1072: } else {
1073: conflictTable[j][k] = (byte) -1;
1074: }
1075: }
1076: }
1077: }
1078: }
1079: }
1080:
1081: // report all errors
1082: for (int i = 0; i < fElemMapSize; i++) {
1083: for (int j = 0; j < fElemMapSize; j++) {
1084: if (conflictTable[i][j] == 1) {
1085: //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(),
1086: // fElemMap[j].toString()});
1087: // REVISIT: do we want to report all errors? or just one?
1088: throw new XMLSchemaException("cos-nonambig",
1089: new Object[] { fElemMap[i].toString(),
1090: fElemMap[j].toString() });
1091: }
1092: }
1093: }
1094:
1095: // if there is a other or list wildcard, we need to check this CM
1096: // again, if this grammar is cached.
1097: for (int i = 0; i < fElemMapSize; i++) {
1098: if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) {
1099: XSWildcardDecl wildcard = (XSWildcardDecl) fElemMap[i];
1100: if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST
1101: || wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) {
1102: return true;
1103: }
1104: }
1105: }
1106:
1107: return false;
1108: }
1109:
1110: /**
1111: * Check which elements are valid to appear at this point. This method also
1112: * works if the state is in error, in which case it returns what should
1113: * have been seen.
1114: *
1115: * @param state the current state
1116: * @return a Vector whose entries are instances of
1117: * either XSWildcardDecl or XSElementDecl.
1118: */
1119: public Vector whatCanGoHere(int[] state) {
1120: int curState = state[0];
1121: if (curState < 0)
1122: curState = state[1];
1123: Occurence o = (fCountingStates != null) ? fCountingStates[curState]
1124: : null;
1125: int count = state[2];
1126:
1127: Vector ret = new Vector();
1128: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
1129: int nextState = fTransTable[curState][elemIndex];
1130: if (nextState != -1) {
1131: if (o != null) {
1132: if (curState == nextState) {
1133: // Do not include transitions which loop back to the
1134: // current state if we've looped the maximum number
1135: // of times or greater.
1136: if (count >= o.maxOccurs
1137: && o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
1138: continue;
1139: }
1140: }
1141: // Do not include transitions which advance past the
1142: // current state if we have not looped enough times.
1143: else if (count < o.minOccurs) {
1144: continue;
1145: }
1146: }
1147: ret.addElement(fElemMap[elemIndex]);
1148: }
1149: }
1150: return ret;
1151: }
1152:
1153: public boolean isCompactedForUPA() {
1154: return fIsCompactedForUPA;
1155: }
1156: } // class DFAContentModel
|