001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.xerces.impl.dtd.models;
019:
020: import java.util.HashMap;
021:
022: import org.apache.xerces.impl.dtd.XMLContentSpec;
023: import org.apache.xerces.xni.QName;
024:
025: /**
026: * DFAContentModel is the derivative of ContentModel that does
027: * all of the non-trivial element content validation. This class does
028: * the conversion from the regular expression to the DFA that
029: * it then uses in its validation algorithm.
030: * <p>
031: * <b>Note:</b> Upstream work insures that this class will never see
032: * a content model with PCDATA in it. Any model with PCDATA is 'mixed'
033: * and is handled via the MixedContentModel class since mixed models
034: * are very constrained in form and easily handled via a special case.
035: * This also makes implementation of this class much easier.
036: *
037: * @xerces.internal
038: *
039: * @version $Id: DFAContentModel.java 572057 2007-09-02 18:03:20Z mrglavas $
040: */
041: public class DFAContentModel implements ContentModelValidator {
042:
043: //
044: // Constants
045: //
046: // special strings
047:
048: /** Epsilon string. */
049: private static String fEpsilonString = "<<CMNODE_EPSILON>>";
050:
051: /** End-of-content string. */
052: private static String fEOCString = "<<CMNODE_EOC>>";
053:
054: /** initializing static members **/
055: static {
056: fEpsilonString = fEpsilonString.intern();
057: fEOCString = fEOCString.intern();
058: }
059:
060: // debugging
061:
062: /** Set to true to debug content model validation. */
063: private static final boolean DEBUG_VALIDATE_CONTENT = false;
064:
065: //
066: // Data
067: //
068:
069: /* this is the EquivClassComparator object */
070: //private EquivClassComparator comparator = null;
071: /**
072: * This is the map of unique input symbol elements to indices into
073: * each state's per-input symbol transition table entry. This is part
074: * of the built DFA information that must be kept around to do the
075: * actual validation.
076: */
077: private QName fElemMap[] = null;
078:
079: /**
080: * This is a map of whether the element map contains information
081: * related to ANY models.
082: */
083: private int fElemMapType[] = null;
084:
085: /** The element map size. */
086: private int fElemMapSize = 0;
087:
088: /** Boolean to distinguish Schema Mixed Content */
089: private boolean fMixed;
090:
091: /**
092: * The NFA position of the special EOC (end of content) node. This
093: * is saved away since it's used during the DFA build.
094: */
095: private int fEOCPos = 0;
096:
097: /**
098: * This is an array of booleans, one per state (there are
099: * fTransTableSize states in the DFA) that indicates whether that
100: * state is a final state.
101: */
102: private boolean fFinalStateFlags[] = null;
103:
104: /**
105: * The list of follow positions for each NFA position (i.e. for each
106: * non-epsilon leaf node.) This is only used during the building of
107: * the DFA, and is let go afterwards.
108: */
109: private CMStateSet fFollowList[] = null;
110:
111: /**
112: * This is the head node of our intermediate representation. It is
113: * only non-null during the building of the DFA (just so that it
114: * does not have to be passed all around.) Once the DFA is built,
115: * this is no longer required so its nulled out.
116: */
117: private CMNode fHeadNode = null;
118:
119: /**
120: * The count of leaf nodes. This is an important number that set some
121: * limits on the sizes of data structures in the DFA process.
122: */
123: private int fLeafCount = 0;
124:
125: /**
126: * An array of non-epsilon leaf nodes, which is used during the DFA
127: * build operation, then dropped.
128: */
129: private CMLeaf fLeafList[] = null;
130:
131: /** Array mapping ANY types to the leaf list. */
132: private int fLeafListType[] = null;
133:
134: //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
135:
136: /**
137: * The string pool of our parser session. This is set during construction
138: * and kept around.
139: */
140: //private StringPool fStringPool = null;
141: /**
142: * This is the transition table that is the main by product of all
143: * of the effort here. It is an array of arrays of ints. The first
144: * dimension is the number of states we end up with in the DFA. The
145: * second dimensions is the number of unique elements in the content
146: * model (fElemMapSize). Each entry in the second dimension indicates
147: * the new state given that input for the first dimension's start
148: * state.
149: * <p>
150: * The fElemMap array handles mapping from element indexes to
151: * positions in the second dimension of the transition table.
152: */
153: private int fTransTable[][] = null;
154:
155: /**
156: * The number of valid entries in the transition table, and in the other
157: * related tables such as fFinalStateFlags.
158: */
159: private int fTransTableSize = 0;
160:
161: /**
162: * Flag that indicates that even though we have a "complicated"
163: * content model, it is valid to have no content. In other words,
164: * all parts of the content model are optional. For example:
165: * <pre>
166: * <!ELEMENT AllOptional (Optional*,NotRequired?)>
167: * </pre>
168: */
169: private boolean fEmptyContentIsValid = false;
170:
171: // temp variables
172:
173: /** Temporary qualified name. */
174: private final QName fQName = new QName();
175:
176: //
177: // Constructors
178: //
179:
180: //
181: // Constructors
182: //
183:
184: /**
185: * Constructs a DFA content model.
186: *
187: * @param syntaxTree The syntax tree of the content model.
188: * @param leafCount The number of leaves.
189: * @param mixed
190: *
191: */
192: public DFAContentModel(CMNode syntaxTree, int leafCount,
193: boolean mixed) {
194: // Store away our index and pools in members
195: //fStringPool = stringPool;
196: fLeafCount = leafCount;
197:
198: // this is for Schema Mixed Content
199: fMixed = mixed;
200:
201: //
202: // Ok, so lets grind through the building of the DFA. This method
203: // handles the high level logic of the algorithm, but it uses a
204: // number of helper classes to do its thing.
205: //
206: // In order to avoid having hundreds of references to the error and
207: // string handlers around, this guy and all of his helper classes
208: // just throw a simple exception and we then pass it along.
209: //
210: buildDFA(syntaxTree);
211: }
212:
213: //
214: // ContentModelValidator methods
215: //
216:
217: /**
218: * Check that the specified content is valid according to this
219: * content model. This method can also be called to do 'what if'
220: * testing of content models just to see if they would be valid.
221: * <p>
222: * A value of -1 in the children array indicates a PCDATA node. All other
223: * indexes will be positive and represent child elements. The count can be
224: * zero, since some elements have the EMPTY content model and that must be
225: * confirmed.
226: *
227: * @param children The children of this element. Each integer is an index within
228: * the <code>StringPool</code> of the child element name. An index
229: * of -1 is used to indicate an occurrence of non-whitespace character
230: * data.
231: * @param offset Offset into the array where the children starts.
232: * @param length The number of entries in the <code>children</code> array.
233: *
234: * @return The value -1 if fully valid, else the 0 based index of the child
235: * that first failed. If the value returned is equal to the number
236: * of children, then the specified children are valid but additional
237: * content is required to reach a valid ending state.
238: *
239: */
240: public int validate(QName[] children, int offset, int length) {
241:
242: if (DEBUG_VALIDATE_CONTENT)
243: System.out.println("DFAContentModel#validateContent");
244:
245: //
246: // A DFA content model must *always* have at least 1 child
247: // so a failure is given if no children present.
248: //
249: // Defect 782: This is an incorrect statement because a DFA
250: // content model is also used for constructions such as:
251: //
252: // (Optional*,NotRequired?)
253: //
254: // where a perfectly valid content would be NO CHILDREN.
255: // Therefore, if there are no children, we must check to
256: // see if the CMNODE_EOC marker is a valid start state! -Ac
257: //
258: if (length == 0) {
259: if (DEBUG_VALIDATE_CONTENT) {
260: System.out.println("!!! no children");
261: System.out.println("elemMap=" + fElemMap);
262: for (int i = 0; i < fElemMap.length; i++) {
263: String uri = fElemMap[i].uri;
264: String localpart = fElemMap[i].localpart;
265:
266: System.out.println("fElemMap[" + i + "]=" + uri
267: + "," + localpart + " (" + uri + ", "
268: + localpart + ')');
269:
270: }
271: System.out.println("EOCIndex=" + fEOCString);
272: }
273:
274: return fEmptyContentIsValid ? -1 : 0;
275:
276: } // if child count == 0
277:
278: //
279: // Lets loop through the children in the array and move our way
280: // through the states. Note that we use the fElemMap array to map
281: // an element index to a state index.
282: //
283: int curState = 0;
284: for (int childIndex = 0; childIndex < length; childIndex++) {
285: // Get the current element index out
286: final QName curElem = children[offset + childIndex];
287: // ignore mixed text
288: if (fMixed && curElem.localpart == null) {
289: continue;
290: }
291:
292: // Look up this child in our element map
293: int elemIndex = 0;
294: for (; elemIndex < fElemMapSize; elemIndex++) {
295: int type = fElemMapType[elemIndex] & 0x0f;
296: if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
297: //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
298: if (fElemMap[elemIndex].rawname == curElem.rawname) {
299: break;
300: }
301: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
302: String uri = fElemMap[elemIndex].uri;
303: if (uri == null || uri == curElem.uri) {
304: break;
305: }
306: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
307: if (curElem.uri == null) {
308: break;
309: }
310: } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
311: if (fElemMap[elemIndex].uri != curElem.uri) {
312: break;
313: }
314: }
315: }
316:
317: // If we didn't find it, then obviously not valid
318: if (elemIndex == fElemMapSize) {
319: if (DEBUG_VALIDATE_CONTENT) {
320: System.out.println("!!! didn't find it");
321:
322: System.out.println("curElem : " + curElem);
323: for (int i = 0; i < fElemMapSize; i++) {
324: System.out.println("fElemMap[" + i + "] = "
325: + fElemMap[i]);
326: System.out.println("fElemMapType[" + i + "] = "
327: + fElemMapType[i]);
328: }
329: }
330:
331: return childIndex;
332: }
333:
334: //
335: // Look up the next state for this input symbol when in the
336: // current state.
337: //
338: curState = fTransTable[curState][elemIndex];
339:
340: // If its not a legal transition, then invalid
341: if (curState == -1) {
342: if (DEBUG_VALIDATE_CONTENT)
343: System.out.println("!!! not a legal transition");
344: return childIndex;
345: }
346: }
347:
348: //
349: // We transitioned all the way through the input list. However, that
350: // does not mean that we ended in a final state. So check whether
351: // our ending state is a final state.
352: //
353: if (DEBUG_VALIDATE_CONTENT)
354: System.out.println("curState=" + curState + ", childCount="
355: + length);
356: if (!fFinalStateFlags[curState])
357: return length;
358:
359: // success!
360: return -1;
361: } // validate
362:
363: //
364: // Private methods
365: //
366:
367: /**
368: * Builds the internal DFA transition table from the given syntax tree.
369: *
370: * @param syntaxTree The syntax tree.
371: *
372: * @exception CMException Thrown if DFA cannot be built.
373: */
374: private void buildDFA(CMNode syntaxTree) {
375: //
376: // The first step we need to take is to rewrite the content model
377: // using our CMNode objects, and in the process get rid of any
378: // repetition short cuts, converting them into '*' style repetitions
379: // or getting rid of repetitions altogether.
380: //
381: // The conversions done are:
382: //
383: // x+ -> (x|x*)
384: // x? -> (x|epsilon)
385: //
386: // This is a relatively complex scenario. What is happening is that
387: // we create a top level binary node of which the special EOC value
388: // is set as the right side node. The the left side is set to the
389: // rewritten syntax tree. The source is the original content model
390: // info from the decl pool. The rewrite is done by buildSyntaxTree()
391: // which recurses the decl pool's content of the element and builds
392: // a new tree in the process.
393: //
394: // Note that, during this operation, we set each non-epsilon leaf
395: // node's DFA state position and count the number of such leafs, which
396: // is left in the fLeafCount member.
397: //
398: // The nodeTmp object is passed in just as a temp node to use during
399: // the recursion. Otherwise, we'd have to create a new node on every
400: // level of recursion, which would be piggy in Java (as is everything
401: // for that matter.)
402: //
403:
404: /* MODIFIED (Jan, 2001)
405: *
406: * Use following rules.
407: * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
408: * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
409: *
410: * The same computation of follow as x* is applied to x+
411: *
412: * The modification drastically reduces computation time of
413: * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
414: */
415:
416: fQName.setValues(null, fEOCString, fEOCString, null);
417: CMLeaf nodeEOC = new CMLeaf(fQName);
418: fHeadNode = new CMBinOp(XMLContentSpec.CONTENTSPECNODE_SEQ,
419: syntaxTree, nodeEOC);
420:
421: //
422: // And handle specially the EOC node, which also must be numbered
423: // and counted as a non-epsilon leaf node. It could not be handled
424: // in the above tree build because it was created before all that
425: // started. We save the EOC position since its used during the DFA
426: // building loop.
427: //
428: fEOCPos = fLeafCount;
429: nodeEOC.setPosition(fLeafCount++);
430:
431: //
432: // Ok, so now we have to iterate the new tree and do a little more
433: // work now that we know the leaf count. One thing we need to do is
434: // to calculate the first and last position sets of each node. This
435: // is cached away in each of the nodes.
436: //
437: // Along the way we also set the leaf count in each node as the
438: // maximum state count. They must know this in order to create their
439: // first/last pos sets.
440: //
441: // We also need to build an array of references to the non-epsilon
442: // leaf nodes. Since we iterate it in the same way as before, this
443: // will put them in the array according to their position values.
444: //
445: fLeafList = new CMLeaf[fLeafCount];
446: fLeafListType = new int[fLeafCount];
447: postTreeBuildInit(fHeadNode, 0);
448:
449: //
450: // And, moving onward... We now need to build the follow position
451: // sets for all the nodes. So we allocate an array of state sets,
452: // one for each leaf node (i.e. each DFA position.)
453: //
454: fFollowList = new CMStateSet[fLeafCount];
455: for (int index = 0; index < fLeafCount; index++)
456: fFollowList[index] = new CMStateSet(fLeafCount);
457: calcFollowList(fHeadNode);
458: //
459: // And finally the big push... Now we build the DFA using all the
460: // states and the tree we've built up. First we set up the various
461: // data structures we are going to use while we do this.
462: //
463: // First of all we need an array of unique element names in our
464: // content model. For each transition table entry, we need a set of
465: // contiguous indices to represent the transitions for a particular
466: // input element. So we need to a zero based range of indexes that
467: // map to element types. This element map provides that mapping.
468: //
469: fElemMap = new QName[fLeafCount];
470: fElemMapType = new int[fLeafCount];
471: fElemMapSize = 0;
472: for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
473: fElemMap[outIndex] = new QName();
474:
475: /****
476: if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
477: if (fLeafNameTypeVector == null) {
478: fLeafNameTypeVector = new ContentLeafNameTypeVector();
479: }
480: }
481: /****/
482:
483: // Get the current leaf's element index
484: final QName element = fLeafList[outIndex].getElement();
485:
486: // See if the current leaf node's element index is in the list
487: int inIndex = 0;
488: for (; inIndex < fElemMapSize; inIndex++) {
489: if (fElemMap[inIndex].rawname == element.rawname) {
490: break;
491: }
492: }
493:
494: // If it was not in the list, then add it, if not the EOC node
495: if (inIndex == fElemMapSize) {
496: fElemMap[fElemMapSize].setValues(element);
497: fElemMapType[fElemMapSize] = fLeafListType[outIndex];
498: fElemMapSize++;
499: }
500: }
501: // set up the fLeafNameTypeVector object if there is one.
502: /*****
503: if (fLeafNameTypeVector != null) {
504: fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
505: }
506:
507: /***
508: * Optimization(Jan, 2001); We sort fLeafList according to
509: * elemIndex which is *uniquely* associated to each leaf.
510: * We are *assuming* that each element appears in at least one leaf.
511: **/
512:
513: int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
514: int fSortCount = 0;
515:
516: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
517: for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
518: final QName leaf = fLeafList[leafIndex].getElement();
519: final QName element = fElemMap[elemIndex];
520: if (leaf.rawname == element.rawname) {
521: fLeafSorter[fSortCount++] = leafIndex;
522: }
523: }
524: fLeafSorter[fSortCount++] = -1;
525: }
526:
527: /* Optimization(Jan, 2001) */
528:
529: //
530: // Next lets create some arrays, some that that hold transient
531: // information during the DFA build and some that are permament.
532: // These are kind of sticky since we cannot know how big they will
533: // get, but we don't want to use any Java collections because of
534: // performance.
535: //
536: // Basically they will probably be about fLeafCount*2 on average,
537: // but can be as large as 2^(fLeafCount*2), worst case. So we start
538: // with fLeafCount*4 as a middle ground. This will be very unlikely
539: // to ever have to expand, though it if does, the overhead will be
540: // somewhat ugly.
541: //
542: int curArraySize = fLeafCount * 4;
543: CMStateSet[] statesToDo = new CMStateSet[curArraySize];
544: fFinalStateFlags = new boolean[curArraySize];
545: fTransTable = new int[curArraySize][];
546:
547: //
548: // Ok we start with the initial set as the first pos set of the
549: // head node (which is the seq node that holds the content model
550: // and the EOC node.)
551: //
552: CMStateSet setT = fHeadNode.firstPos();
553:
554: //
555: // Init our two state flags. Basically the unmarked state counter
556: // is always chasing the current state counter. When it catches up,
557: // that means we made a pass through that did not add any new states
558: // to the lists, at which time we are done. We could have used a
559: // expanding array of flags which we used to mark off states as we
560: // complete them, but this is easier though less readable maybe.
561: //
562: int unmarkedState = 0;
563: int curState = 0;
564:
565: //
566: // Init the first transition table entry, and put the initial state
567: // into the states to do list, then bump the current state.
568: //
569: fTransTable[curState] = makeDefStateList();
570: statesToDo[curState] = setT;
571: curState++;
572:
573: /* Optimization(Jan, 2001); This is faster for
574: * a large content model such as, "(t001+|t002+|.... |t500+)".
575: */
576:
577: HashMap stateTable = new HashMap();
578:
579: /* Optimization(Jan, 2001) */
580:
581: //
582: // Ok, almost done with the algorithm... We now enter the
583: // loop where we go until the states done counter catches up with
584: // the states to do counter.
585: //
586: while (unmarkedState < curState) {
587: //
588: // Get the first unmarked state out of the list of states to do.
589: // And get the associated transition table entry.
590: //
591: setT = statesToDo[unmarkedState];
592: int[] transEntry = fTransTable[unmarkedState];
593:
594: // Mark this one final if it contains the EOC state
595: fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
596:
597: // Bump up the unmarked state count, marking this state done
598: unmarkedState++;
599:
600: // Loop through each possible input symbol in the element map
601: CMStateSet newSet = null;
602: /* Optimization(Jan, 2001) */
603: int sorterIndex = 0;
604: /* Optimization(Jan, 2001) */
605: for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
606: //
607: // Build up a set of states which is the union of all of
608: // the follow sets of DFA positions that are in the current
609: // state. If we gave away the new set last time through then
610: // create a new one. Otherwise, zero out the existing one.
611: //
612: if (newSet == null)
613: newSet = new CMStateSet(fLeafCount);
614: else
615: newSet.zeroBits();
616:
617: /* Optimization(Jan, 2001) */
618: int leafIndex = fLeafSorter[sorterIndex++];
619:
620: while (leafIndex != -1) {
621: // If this leaf index (DFA position) is in the current set...
622: if (setT.getBit(leafIndex)) {
623: //
624: // If this leaf is the current input symbol, then we
625: // want to add its follow list to the set of states to
626: // transition to from the current state.
627: //
628: newSet.union(fFollowList[leafIndex]);
629: }
630:
631: leafIndex = fLeafSorter[sorterIndex++];
632: }
633: /* Optimization(Jan, 2001) */
634:
635: //
636: // If this new set is not empty, then see if its in the list
637: // of states to do. If not, then add it.
638: //
639: if (!newSet.isEmpty()) {
640: //
641: // Search the 'states to do' list to see if this new
642: // state set is already in there.
643: //
644:
645: /* Optimization(Jan, 2001) */
646: Integer stateObj = (Integer) stateTable.get(newSet);
647: int stateIndex = (stateObj == null ? curState
648: : stateObj.intValue());
649: /* Optimization(Jan, 2001) */
650:
651: // If we did not find it, then add it
652: if (stateIndex == curState) {
653: //
654: // Put this new state into the states to do and init
655: // a new entry at the same index in the transition
656: // table.
657: //
658: statesToDo[curState] = newSet;
659: fTransTable[curState] = makeDefStateList();
660:
661: /* Optimization(Jan, 2001) */
662: stateTable.put(newSet, new Integer(curState));
663: /* Optimization(Jan, 2001) */
664:
665: // We now have a new state to do so bump the count
666: curState++;
667:
668: //
669: // Null out the new set to indicate we adopted it.
670: // This will cause the creation of a new set on the
671: // next time around the loop.
672: //
673: newSet = null;
674: }
675:
676: //
677: // Now set this state in the transition table's entry
678: // for this element (using its index), with the DFA
679: // state we will move to from the current state when we
680: // see this input element.
681: //
682: transEntry[elemIndex] = stateIndex;
683:
684: // Expand the arrays if we're full
685: if (curState == curArraySize) {
686: //
687: // Yikes, we overflowed the initial array size, so
688: // we've got to expand all of these arrays. So adjust
689: // up the size by 50% and allocate new arrays.
690: //
691: final int newSize = (int) (curArraySize * 1.5);
692: CMStateSet[] newToDo = new CMStateSet[newSize];
693: boolean[] newFinalFlags = new boolean[newSize];
694: int[][] newTransTable = new int[newSize][];
695:
696: // Copy over all of the existing content
697: System.arraycopy(statesToDo, 0, newToDo, 0,
698: curArraySize);
699: System.arraycopy(fFinalStateFlags, 0,
700: newFinalFlags, 0, curArraySize);
701: System.arraycopy(fTransTable, 0, newTransTable,
702: 0, curArraySize);
703:
704: // Store the new array size
705: curArraySize = newSize;
706: statesToDo = newToDo;
707: fFinalStateFlags = newFinalFlags;
708: fTransTable = newTransTable;
709: }
710: }
711: }
712: }
713:
714: // Check to see if we can set the fEmptyContentIsValid flag.
715: fEmptyContentIsValid = ((CMBinOp) fHeadNode).getLeft()
716: .isNullable();
717:
718: //
719: // And now we can say bye bye to the temp representation since we've
720: // built the DFA.
721: //
722: if (DEBUG_VALIDATE_CONTENT)
723: dumpTree(fHeadNode, 0);
724: fHeadNode = null;
725: fLeafList = null;
726: fFollowList = null;
727:
728: }
729:
730: /**
731: * Calculates the follow list of the current node.
732: *
733: * @param nodeCur The curent node.
734: *
735: * @exception CMException Thrown if follow list cannot be calculated.
736: */
737: private void calcFollowList(CMNode nodeCur) {
738: // Recurse as required
739: if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE) {
740: // Recurse only
741: calcFollowList(((CMBinOp) nodeCur).getLeft());
742: calcFollowList(((CMBinOp) nodeCur).getRight());
743: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ) {
744: // Recurse first
745: calcFollowList(((CMBinOp) nodeCur).getLeft());
746: calcFollowList(((CMBinOp) nodeCur).getRight());
747:
748: //
749: // Now handle our level. We use our left child's last pos
750: // set and our right child's first pos set, so go ahead and
751: // get them ahead of time.
752: //
753: final CMStateSet last = ((CMBinOp) nodeCur).getLeft()
754: .lastPos();
755: final CMStateSet first = ((CMBinOp) nodeCur).getRight()
756: .firstPos();
757:
758: //
759: // Now, for every position which is in our left child's last set
760: // add all of the states in our right child's first set to the
761: // follow set for that position.
762: //
763: for (int index = 0; index < fLeafCount; index++) {
764: if (last.getBit(index))
765: fFollowList[index].union(first);
766: }
767: }
768: /***
769: else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
770: {
771: // Recurse first
772: calcFollowList(((CMUniOp)nodeCur).getChild());
773:
774: //
775: // Now handle our level. We use our own first and last position
776: // sets, so get them up front.
777: //
778: final CMStateSet first = nodeCur.firstPos();
779: final CMStateSet last = nodeCur.lastPos();
780:
781: //
782: // For every position which is in our last position set, add all
783: // of our first position states to the follow set for that
784: // position.
785: //
786: for (int index = 0; index < fLeafCount; index++)
787: {
788: if (last.getBit(index))
789: fFollowList[index].union(first);
790: }
791: }
792: else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
793: || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
794: {
795: throw new RuntimeException("ImplementationMessages.VAL_NIICM");
796: }
797: /***/
798: else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
799: || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE) {
800: // Recurse first
801: calcFollowList(((CMUniOp) nodeCur).getChild());
802:
803: //
804: // Now handle our level. We use our own first and last position
805: // sets, so get them up front.
806: //
807: final CMStateSet first = nodeCur.firstPos();
808: final CMStateSet last = nodeCur.lastPos();
809:
810: //
811: // For every position which is in our last position set, add all
812: // of our first position states to the follow set for that
813: // position.
814: //
815: for (int index = 0; index < fLeafCount; index++) {
816: if (last.getBit(index))
817: fFollowList[index].union(first);
818: }
819: }
820:
821: else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
822: // Recurse only
823: calcFollowList(((CMUniOp) nodeCur).getChild());
824: }
825: /***/
826: }
827:
828: /**
829: * Dumps the tree of the current node to standard output.
830: *
831: * @param nodeCur The current node.
832: * @param level The maximum levels to output.
833: *
834: * @exception CMException Thrown on error.
835: */
836: private void dumpTree(CMNode nodeCur, int level) {
837: for (int index = 0; index < level; index++)
838: System.out.print(" ");
839:
840: int type = nodeCur.type();
841: if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
842: || (type == XMLContentSpec.CONTENTSPECNODE_SEQ)) {
843: if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
844: System.out.print("Choice Node ");
845: else
846: System.out.print("Seq Node ");
847:
848: if (nodeCur.isNullable())
849: System.out.print("Nullable ");
850:
851: System.out.print("firstPos=");
852: System.out.print(nodeCur.firstPos().toString());
853: System.out.print(" lastPos=");
854: System.out.println(nodeCur.lastPos().toString());
855:
856: dumpTree(((CMBinOp) nodeCur).getLeft(), level + 1);
857: dumpTree(((CMBinOp) nodeCur).getRight(), level + 1);
858: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE) {
859: System.out.print("Rep Node ");
860:
861: if (nodeCur.isNullable())
862: System.out.print("Nullable ");
863:
864: System.out.print("firstPos=");
865: System.out.print(nodeCur.firstPos().toString());
866: System.out.print(" lastPos=");
867: System.out.println(nodeCur.lastPos().toString());
868:
869: dumpTree(((CMUniOp) nodeCur).getChild(), level + 1);
870: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) {
871: System.out.print("Leaf: (pos="
872: + ((CMLeaf) nodeCur).getPosition() + "), "
873: + ((CMLeaf) nodeCur).getElement() + "(elemIndex="
874: + ((CMLeaf) nodeCur).getElement() + ") ");
875:
876: if (nodeCur.isNullable())
877: System.out.print(" Nullable ");
878:
879: System.out.print("firstPos=");
880: System.out.print(nodeCur.firstPos().toString());
881: System.out.print(" lastPos=");
882: System.out.println(nodeCur.lastPos().toString());
883: } else {
884: throw new RuntimeException(
885: "ImplementationMessages.VAL_NIICM");
886: }
887: }
888:
889: /**
890: * -1 is used to represent bad transitions in the transition table
891: * entry for each state. So each entry is initialized to an all -1
892: * array. This method creates a new entry and initializes it.
893: */
894: private int[] makeDefStateList() {
895: int[] retArray = new int[fElemMapSize];
896: for (int index = 0; index < fElemMapSize; index++)
897: retArray[index] = -1;
898: return retArray;
899: }
900:
901: /** Post tree build initialization. */
902: private int postTreeBuildInit(CMNode nodeCur, int curIndex) {
903: // Set the maximum states on this node
904: nodeCur.setMaxStates(fLeafCount);
905:
906: // Recurse as required
907: if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY
908: || (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL
909: || (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
910: // REVISIT: Don't waste these structures.
911: QName qname = new QName(null, null, null, ((CMAny) nodeCur)
912: .getURI());
913: fLeafList[curIndex] = new CMLeaf(qname, ((CMAny) nodeCur)
914: .getPosition());
915: fLeafListType[curIndex] = nodeCur.type();
916: curIndex++;
917: } else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
918: || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)) {
919: curIndex = postTreeBuildInit(((CMBinOp) nodeCur).getLeft(),
920: curIndex);
921: curIndex = postTreeBuildInit(
922: ((CMBinOp) nodeCur).getRight(), curIndex);
923: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
924: || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
925: || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
926: curIndex = postTreeBuildInit(
927: ((CMUniOp) nodeCur).getChild(), curIndex);
928: } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) {
929: //
930: // Put this node in the leaf list at the current index if its
931: // a non-epsilon leaf.
932: //
933: final QName node = ((CMLeaf) nodeCur).getElement();
934: if (node.localpart != fEpsilonString) {
935: fLeafList[curIndex] = (CMLeaf) nodeCur;
936: fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
937: curIndex++;
938: }
939: } else {
940: throw new RuntimeException(
941: "ImplementationMessages.VAL_NIICM: type="
942: + nodeCur.type());
943: }
944: return curIndex;
945: }
946:
947: } // class DFAContentModel
|