Source Code Cross Referenced for DFAContentModel.java in  » XML » xerces-2_9_1 » org » apache » xerces » impl » dtd » models » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » XML » xerces 2_9_1 » org.apache.xerces.impl.dtd.models 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * Licensed to the Apache Software Foundation (ASF) under one or more
003:         * contributor license agreements.  See the NOTICE file distributed with
004:         * this work for additional information regarding copyright ownership.
005:         * The ASF licenses this file to You under the Apache License, Version 2.0
006:         * (the "License"); you may not use this file except in compliance with
007:         * the License.  You may obtain a copy of the License at
008:         * 
009:         *      http://www.apache.org/licenses/LICENSE-2.0
010:         * 
011:         * Unless required by applicable law or agreed to in writing, software
012:         * distributed under the License is distributed on an "AS IS" BASIS,
013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:         * See the License for the specific language governing permissions and
015:         * limitations under the License.
016:         */
017:
018:        package org.apache.xerces.impl.dtd.models;
019:
020:        import java.util.HashMap;
021:
022:        import org.apache.xerces.impl.dtd.XMLContentSpec;
023:        import org.apache.xerces.xni.QName;
024:
025:        /**
026:         * DFAContentModel is the derivative of ContentModel that does
027:         * all of the non-trivial element content validation. This class does 
028:         * the conversion from the regular expression to the DFA that 
029:         * it then uses in its validation algorithm.
030:         * <p>
031:         * <b>Note:</b> Upstream work insures that this class will never see
032:         * a content model with PCDATA in it. Any model with PCDATA is 'mixed' 
033:         * and is handled via the MixedContentModel class since mixed models 
034:         * are very constrained in form and easily handled via a special case. 
035:         * This also makes implementation of this class much easier.
036:         * 
037:         * @xerces.internal
038:         * 
039:         * @version $Id: DFAContentModel.java 572057 2007-09-02 18:03:20Z mrglavas $
040:         */
041:        public class DFAContentModel implements  ContentModelValidator {
042:
043:            //
044:            // Constants
045:            //
046:            // special strings
047:
048:            /** Epsilon string. */
049:            private static String fEpsilonString = "<<CMNODE_EPSILON>>";
050:
051:            /** End-of-content string. */
052:            private static String fEOCString = "<<CMNODE_EOC>>";
053:
054:            /** initializing static members **/
055:            static {
056:                fEpsilonString = fEpsilonString.intern();
057:                fEOCString = fEOCString.intern();
058:            }
059:
060:            // debugging
061:
062:            /** Set to true to debug content model validation. */
063:            private static final boolean DEBUG_VALIDATE_CONTENT = false;
064:
065:            //
066:            // Data
067:            //
068:
069:            /* this is the EquivClassComparator object */
070:            //private EquivClassComparator comparator = null;
071:            /**
072:             * This is the map of unique input symbol elements to indices into
073:             * each state's per-input symbol transition table entry. This is part
074:             * of the built DFA information that must be kept around to do the
075:             * actual validation.
076:             */
077:            private QName fElemMap[] = null;
078:
079:            /**
080:             * This is a map of whether the element map contains information 
081:             * related to ANY models.
082:             */
083:            private int fElemMapType[] = null;
084:
085:            /** The element map size. */
086:            private int fElemMapSize = 0;
087:
088:            /** Boolean to distinguish Schema Mixed Content */
089:            private boolean fMixed;
090:
091:            /**
092:             * The NFA position of the special EOC (end of content) node. This
093:             * is saved away since it's used during the DFA build.
094:             */
095:            private int fEOCPos = 0;
096:
097:            /**
098:             * This is an array of booleans, one per state (there are
099:             * fTransTableSize states in the DFA) that indicates whether that
100:             * state is a final state.
101:             */
102:            private boolean fFinalStateFlags[] = null;
103:
104:            /**
105:             * The list of follow positions for each NFA position (i.e. for each
106:             * non-epsilon leaf node.) This is only used during the building of
107:             * the DFA, and is let go afterwards.
108:             */
109:            private CMStateSet fFollowList[] = null;
110:
111:            /**
112:             * This is the head node of our intermediate representation. It is
113:             * only non-null during the building of the DFA (just so that it
114:             * does not have to be passed all around.) Once the DFA is built,
115:             * this is no longer required so its nulled out.
116:             */
117:            private CMNode fHeadNode = null;
118:
119:            /**
120:             * The count of leaf nodes. This is an important number that set some
121:             * limits on the sizes of data structures in the DFA process.
122:             */
123:            private int fLeafCount = 0;
124:
125:            /**
126:             * An array of non-epsilon leaf nodes, which is used during the DFA
127:             * build operation, then dropped.
128:             */
129:            private CMLeaf fLeafList[] = null;
130:
131:            /** Array mapping ANY types to the leaf list. */
132:            private int fLeafListType[] = null;
133:
134:            //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
135:
136:            /**
137:             * The string pool of our parser session. This is set during construction
138:             * and kept around.
139:             */
140:            //private StringPool fStringPool = null;
141:            /**
142:             * This is the transition table that is the main by product of all
143:             * of the effort here. It is an array of arrays of ints. The first
144:             * dimension is the number of states we end up with in the DFA. The
145:             * second dimensions is the number of unique elements in the content
146:             * model (fElemMapSize). Each entry in the second dimension indicates
147:             * the new state given that input for the first dimension's start
148:             * state.
149:             * <p>
150:             * The fElemMap array handles mapping from element indexes to
151:             * positions in the second dimension of the transition table.
152:             */
153:            private int fTransTable[][] = null;
154:
155:            /**
156:             * The number of valid entries in the transition table, and in the other
157:             * related tables such as fFinalStateFlags.
158:             */
159:            private int fTransTableSize = 0;
160:
161:            /**
162:             * Flag that indicates that even though we have a "complicated"
163:             * content model, it is valid to have no content. In other words,
164:             * all parts of the content model are optional. For example:
165:             * <pre>
166:             *      &lt;!ELEMENT AllOptional (Optional*,NotRequired?)&gt;
167:             * </pre>
168:             */
169:            private boolean fEmptyContentIsValid = false;
170:
171:            // temp variables
172:
173:            /** Temporary qualified name. */
174:            private final QName fQName = new QName();
175:
176:            //
177:            // Constructors
178:            //
179:
180:            //
181:            // Constructors
182:            //
183:
184:            /**
185:             * Constructs a DFA content model.
186:             *
187:             * @param syntaxTree    The syntax tree of the content model.
188:             * @param leafCount     The number of leaves.
189:             * @param mixed
190:             *
191:             */
192:            public DFAContentModel(CMNode syntaxTree, int leafCount,
193:                    boolean mixed) {
194:                // Store away our index and pools in members
195:                //fStringPool = stringPool;
196:                fLeafCount = leafCount;
197:
198:                // this is for Schema Mixed Content
199:                fMixed = mixed;
200:
201:                //
202:                //  Ok, so lets grind through the building of the DFA. This method
203:                //  handles the high level logic of the algorithm, but it uses a
204:                //  number of helper classes to do its thing.
205:                //
206:                //  In order to avoid having hundreds of references to the error and
207:                //  string handlers around, this guy and all of his helper classes
208:                //  just throw a simple exception and we then pass it along.
209:                //
210:                buildDFA(syntaxTree);
211:            }
212:
213:            //
214:            // ContentModelValidator methods
215:            //
216:
217:            /**
218:             * Check that the specified content is valid according to this
219:             * content model. This method can also be called to do 'what if' 
220:             * testing of content models just to see if they would be valid.
221:             * <p>
222:             * A value of -1 in the children array indicates a PCDATA node. All other 
223:             * indexes will be positive and represent child elements. The count can be
224:             * zero, since some elements have the EMPTY content model and that must be 
225:             * confirmed.
226:             *
227:             * @param children The children of this element.  Each integer is an index within
228:             *                 the <code>StringPool</code> of the child element name.  An index
229:             *                 of -1 is used to indicate an occurrence of non-whitespace character
230:             *                 data.
231:             * @param offset Offset into the array where the children starts.
232:             * @param length The number of entries in the <code>children</code> array.
233:             *
234:             * @return The value -1 if fully valid, else the 0 based index of the child
235:             *         that first failed. If the value returned is equal to the number
236:             *         of children, then the specified children are valid but additional
237:             *         content is required to reach a valid ending state.
238:             *
239:             */
240:            public int validate(QName[] children, int offset, int length) {
241:
242:                if (DEBUG_VALIDATE_CONTENT)
243:                    System.out.println("DFAContentModel#validateContent");
244:
245:                //
246:                // A DFA content model must *always* have at least 1 child
247:                // so a failure is given if no children present.
248:                // 
249:                // Defect 782: This is an incorrect statement because a DFA
250:                // content model is also used for constructions such as:
251:                //
252:                //     (Optional*,NotRequired?)
253:                //
254:                // where a perfectly valid content would be NO CHILDREN.
255:                // Therefore, if there are no children, we must check to
256:                // see if the CMNODE_EOC marker is a valid start state! -Ac
257:                //
258:                if (length == 0) {
259:                    if (DEBUG_VALIDATE_CONTENT) {
260:                        System.out.println("!!! no children");
261:                        System.out.println("elemMap=" + fElemMap);
262:                        for (int i = 0; i < fElemMap.length; i++) {
263:                            String uri = fElemMap[i].uri;
264:                            String localpart = fElemMap[i].localpart;
265:
266:                            System.out.println("fElemMap[" + i + "]=" + uri
267:                                    + "," + localpart + " (" + uri + ", "
268:                                    + localpart + ')');
269:
270:                        }
271:                        System.out.println("EOCIndex=" + fEOCString);
272:                    }
273:
274:                    return fEmptyContentIsValid ? -1 : 0;
275:
276:                } // if child count == 0
277:
278:                //
279:                //  Lets loop through the children in the array and move our way
280:                //  through the states. Note that we use the fElemMap array to map
281:                //  an element index to a state index.
282:                //
283:                int curState = 0;
284:                for (int childIndex = 0; childIndex < length; childIndex++) {
285:                    // Get the current element index out
286:                    final QName curElem = children[offset + childIndex];
287:                    // ignore mixed text
288:                    if (fMixed && curElem.localpart == null) {
289:                        continue;
290:                    }
291:
292:                    // Look up this child in our element map
293:                    int elemIndex = 0;
294:                    for (; elemIndex < fElemMapSize; elemIndex++) {
295:                        int type = fElemMapType[elemIndex] & 0x0f;
296:                        if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
297:                            //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
298:                            if (fElemMap[elemIndex].rawname == curElem.rawname) {
299:                                break;
300:                            }
301:                        } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
302:                            String uri = fElemMap[elemIndex].uri;
303:                            if (uri == null || uri == curElem.uri) {
304:                                break;
305:                            }
306:                        } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
307:                            if (curElem.uri == null) {
308:                                break;
309:                            }
310:                        } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
311:                            if (fElemMap[elemIndex].uri != curElem.uri) {
312:                                break;
313:                            }
314:                        }
315:                    }
316:
317:                    // If we didn't find it, then obviously not valid
318:                    if (elemIndex == fElemMapSize) {
319:                        if (DEBUG_VALIDATE_CONTENT) {
320:                            System.out.println("!!! didn't find it");
321:
322:                            System.out.println("curElem : " + curElem);
323:                            for (int i = 0; i < fElemMapSize; i++) {
324:                                System.out.println("fElemMap[" + i + "] = "
325:                                        + fElemMap[i]);
326:                                System.out.println("fElemMapType[" + i + "] = "
327:                                        + fElemMapType[i]);
328:                            }
329:                        }
330:
331:                        return childIndex;
332:                    }
333:
334:                    //
335:                    //  Look up the next state for this input symbol when in the
336:                    //  current state.
337:                    //
338:                    curState = fTransTable[curState][elemIndex];
339:
340:                    // If its not a legal transition, then invalid
341:                    if (curState == -1) {
342:                        if (DEBUG_VALIDATE_CONTENT)
343:                            System.out.println("!!! not a legal transition");
344:                        return childIndex;
345:                    }
346:                }
347:
348:                //
349:                //  We transitioned all the way through the input list. However, that
350:                //  does not mean that we ended in a final state. So check whether
351:                //  our ending state is a final state.
352:                //
353:                if (DEBUG_VALIDATE_CONTENT)
354:                    System.out.println("curState=" + curState + ", childCount="
355:                            + length);
356:                if (!fFinalStateFlags[curState])
357:                    return length;
358:
359:                // success!
360:                return -1;
361:            } // validate
362:
363:            //
364:            // Private methods
365:            //
366:
367:            /** 
368:             * Builds the internal DFA transition table from the given syntax tree.
369:             *
370:             * @param syntaxTree The syntax tree.
371:             *
372:             * @exception CMException Thrown if DFA cannot be built.
373:             */
374:            private void buildDFA(CMNode syntaxTree) {
375:                //
376:                //  The first step we need to take is to rewrite the content model
377:                //  using our CMNode objects, and in the process get rid of any
378:                //  repetition short cuts, converting them into '*' style repetitions
379:                //  or getting rid of repetitions altogether.
380:                //
381:                //  The conversions done are:
382:                //
383:                //  x+ -> (x|x*)
384:                //  x? -> (x|epsilon)
385:                //
386:                //  This is a relatively complex scenario. What is happening is that
387:                //  we create a top level binary node of which the special EOC value
388:                //  is set as the right side node. The the left side is set to the
389:                //  rewritten syntax tree. The source is the original content model
390:                //  info from the decl pool. The rewrite is done by buildSyntaxTree()
391:                //  which recurses the decl pool's content of the element and builds
392:                //  a new tree in the process.
393:                //
394:                //  Note that, during this operation, we set each non-epsilon leaf
395:                //  node's DFA state position and count the number of such leafs, which
396:                //  is left in the fLeafCount member.
397:                //
398:                //  The nodeTmp object is passed in just as a temp node to use during
399:                //  the recursion. Otherwise, we'd have to create a new node on every
400:                //  level of recursion, which would be piggy in Java (as is everything
401:                //  for that matter.)
402:                //
403:
404:                /* MODIFIED (Jan, 2001) 
405:                 *
406:                 * Use following rules.
407:                 *   nullable(x+) := nullable(x), first(x+) := first(x),  last(x+) := last(x)
408:                 *   nullable(x?) := true, first(x?) := first(x),  last(x?) := last(x)
409:                 *
410:                 * The same computation of follow as x* is applied to x+
411:                 *
412:                 * The modification drastically reduces computation time of
413:                 * "(a, (b, a+, (c, (b, a+)+, a+, (d,  (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
414:                 */
415:
416:                fQName.setValues(null, fEOCString, fEOCString, null);
417:                CMLeaf nodeEOC = new CMLeaf(fQName);
418:                fHeadNode = new CMBinOp(XMLContentSpec.CONTENTSPECNODE_SEQ,
419:                        syntaxTree, nodeEOC);
420:
421:                //
422:                //  And handle specially the EOC node, which also must be numbered
423:                //  and counted as a non-epsilon leaf node. It could not be handled
424:                //  in the above tree build because it was created before all that
425:                //  started. We save the EOC position since its used during the DFA
426:                //  building loop.
427:                //
428:                fEOCPos = fLeafCount;
429:                nodeEOC.setPosition(fLeafCount++);
430:
431:                //
432:                //  Ok, so now we have to iterate the new tree and do a little more
433:                //  work now that we know the leaf count. One thing we need to do is
434:                //  to calculate the first and last position sets of each node. This
435:                //  is cached away in each of the nodes.
436:                //
437:                //  Along the way we also set the leaf count in each node as the
438:                //  maximum state count. They must know this in order to create their
439:                //  first/last pos sets.
440:                //
441:                //  We also need to build an array of references to the non-epsilon
442:                //  leaf nodes. Since we iterate it in the same way as before, this
443:                //  will put them in the array according to their position values.
444:                //
445:                fLeafList = new CMLeaf[fLeafCount];
446:                fLeafListType = new int[fLeafCount];
447:                postTreeBuildInit(fHeadNode, 0);
448:
449:                //
450:                //  And, moving onward... We now need to build the follow position
451:                //  sets for all the nodes. So we allocate an array of state sets,
452:                //  one for each leaf node (i.e. each DFA position.)
453:                //
454:                fFollowList = new CMStateSet[fLeafCount];
455:                for (int index = 0; index < fLeafCount; index++)
456:                    fFollowList[index] = new CMStateSet(fLeafCount);
457:                calcFollowList(fHeadNode);
458:                //
459:                //  And finally the big push... Now we build the DFA using all the
460:                //  states and the tree we've built up. First we set up the various
461:                //  data structures we are going to use while we do this.
462:                //
463:                //  First of all we need an array of unique element names in our
464:                //  content model. For each transition table entry, we need a set of
465:                //  contiguous indices to represent the transitions for a particular
466:                //  input element. So we need to a zero based range of indexes that
467:                //  map to element types. This element map provides that mapping.
468:                //
469:                fElemMap = new QName[fLeafCount];
470:                fElemMapType = new int[fLeafCount];
471:                fElemMapSize = 0;
472:                for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
473:                    fElemMap[outIndex] = new QName();
474:
475:                    /****
476:                    if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
477:                        if (fLeafNameTypeVector == null) {
478:                            fLeafNameTypeVector = new ContentLeafNameTypeVector();
479:                        }
480:                    }
481:                    /****/
482:
483:                    // Get the current leaf's element index
484:                    final QName element = fLeafList[outIndex].getElement();
485:
486:                    // See if the current leaf node's element index is in the list
487:                    int inIndex = 0;
488:                    for (; inIndex < fElemMapSize; inIndex++) {
489:                        if (fElemMap[inIndex].rawname == element.rawname) {
490:                            break;
491:                        }
492:                    }
493:
494:                    // If it was not in the list, then add it, if not the EOC node
495:                    if (inIndex == fElemMapSize) {
496:                        fElemMap[fElemMapSize].setValues(element);
497:                        fElemMapType[fElemMapSize] = fLeafListType[outIndex];
498:                        fElemMapSize++;
499:                    }
500:                }
501:                // set up the fLeafNameTypeVector object if there is one.
502:                /*****
503:                if (fLeafNameTypeVector != null) {
504:                    fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
505:                }
506:
507:                /*** 
508:                 * Optimization(Jan, 2001); We sort fLeafList according to 
509:                 * elemIndex which is *uniquely* associated to each leaf.  
510:                 * We are *assuming* that each element appears in at least one leaf.
511:                 **/
512:
513:                int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
514:                int fSortCount = 0;
515:
516:                for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
517:                    for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
518:                        final QName leaf = fLeafList[leafIndex].getElement();
519:                        final QName element = fElemMap[elemIndex];
520:                        if (leaf.rawname == element.rawname) {
521:                            fLeafSorter[fSortCount++] = leafIndex;
522:                        }
523:                    }
524:                    fLeafSorter[fSortCount++] = -1;
525:                }
526:
527:                /* Optimization(Jan, 2001) */
528:
529:                //
530:                //  Next lets create some arrays, some that that hold transient
531:                //  information during the DFA build and some that are permament.
532:                //  These are kind of sticky since we cannot know how big they will
533:                //  get, but we don't want to use any Java collections because of
534:                //  performance.
535:                //
536:                //  Basically they will probably be about fLeafCount*2 on average,
537:                //  but can be as large as 2^(fLeafCount*2), worst case. So we start
538:                //  with fLeafCount*4 as a middle ground. This will be very unlikely
539:                //  to ever have to expand, though it if does, the overhead will be
540:                //  somewhat ugly.
541:                //
542:                int curArraySize = fLeafCount * 4;
543:                CMStateSet[] statesToDo = new CMStateSet[curArraySize];
544:                fFinalStateFlags = new boolean[curArraySize];
545:                fTransTable = new int[curArraySize][];
546:
547:                //
548:                //  Ok we start with the initial set as the first pos set of the
549:                //  head node (which is the seq node that holds the content model
550:                //  and the EOC node.)
551:                //
552:                CMStateSet setT = fHeadNode.firstPos();
553:
554:                //
555:                //  Init our two state flags. Basically the unmarked state counter
556:                //  is always chasing the current state counter. When it catches up,
557:                //  that means we made a pass through that did not add any new states
558:                //  to the lists, at which time we are done. We could have used a
559:                //  expanding array of flags which we used to mark off states as we
560:                //  complete them, but this is easier though less readable maybe.
561:                //
562:                int unmarkedState = 0;
563:                int curState = 0;
564:
565:                //
566:                //  Init the first transition table entry, and put the initial state
567:                //  into the states to do list, then bump the current state.
568:                //
569:                fTransTable[curState] = makeDefStateList();
570:                statesToDo[curState] = setT;
571:                curState++;
572:
573:                /* Optimization(Jan, 2001); This is faster for
574:                 * a large content model such as, "(t001+|t002+|.... |t500+)".
575:                 */
576:
577:                HashMap stateTable = new HashMap();
578:
579:                /* Optimization(Jan, 2001) */
580:
581:                //
582:                //  Ok, almost done with the algorithm... We now enter the
583:                //  loop where we go until the states done counter catches up with
584:                //  the states to do counter.
585:                //
586:                while (unmarkedState < curState) {
587:                    //
588:                    //  Get the first unmarked state out of the list of states to do.
589:                    //  And get the associated transition table entry.
590:                    //
591:                    setT = statesToDo[unmarkedState];
592:                    int[] transEntry = fTransTable[unmarkedState];
593:
594:                    // Mark this one final if it contains the EOC state
595:                    fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
596:
597:                    // Bump up the unmarked state count, marking this state done
598:                    unmarkedState++;
599:
600:                    // Loop through each possible input symbol in the element map
601:                    CMStateSet newSet = null;
602:                    /* Optimization(Jan, 2001) */
603:                    int sorterIndex = 0;
604:                    /* Optimization(Jan, 2001) */
605:                    for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
606:                        //
607:                        //  Build up a set of states which is the union of all of
608:                        //  the follow sets of DFA positions that are in the current
609:                        //  state. If we gave away the new set last time through then
610:                        //  create a new one. Otherwise, zero out the existing one.
611:                        //
612:                        if (newSet == null)
613:                            newSet = new CMStateSet(fLeafCount);
614:                        else
615:                            newSet.zeroBits();
616:
617:                        /* Optimization(Jan, 2001) */
618:                        int leafIndex = fLeafSorter[sorterIndex++];
619:
620:                        while (leafIndex != -1) {
621:                            // If this leaf index (DFA position) is in the current set...
622:                            if (setT.getBit(leafIndex)) {
623:                                //
624:                                //  If this leaf is the current input symbol, then we
625:                                //  want to add its follow list to the set of states to
626:                                //  transition to from the current state.
627:                                //
628:                                newSet.union(fFollowList[leafIndex]);
629:                            }
630:
631:                            leafIndex = fLeafSorter[sorterIndex++];
632:                        }
633:                        /* Optimization(Jan, 2001) */
634:
635:                        //
636:                        //  If this new set is not empty, then see if its in the list
637:                        //  of states to do. If not, then add it.
638:                        //
639:                        if (!newSet.isEmpty()) {
640:                            //
641:                            //  Search the 'states to do' list to see if this new
642:                            //  state set is already in there.
643:                            //
644:
645:                            /* Optimization(Jan, 2001) */
646:                            Integer stateObj = (Integer) stateTable.get(newSet);
647:                            int stateIndex = (stateObj == null ? curState
648:                                    : stateObj.intValue());
649:                            /* Optimization(Jan, 2001) */
650:
651:                            // If we did not find it, then add it
652:                            if (stateIndex == curState) {
653:                                //
654:                                //  Put this new state into the states to do and init
655:                                //  a new entry at the same index in the transition
656:                                //  table.
657:                                //
658:                                statesToDo[curState] = newSet;
659:                                fTransTable[curState] = makeDefStateList();
660:
661:                                /* Optimization(Jan, 2001) */
662:                                stateTable.put(newSet, new Integer(curState));
663:                                /* Optimization(Jan, 2001) */
664:
665:                                // We now have a new state to do so bump the count
666:                                curState++;
667:
668:                                //
669:                                //  Null out the new set to indicate we adopted it.
670:                                //  This will cause the creation of a new set on the
671:                                //  next time around the loop.
672:                                //
673:                                newSet = null;
674:                            }
675:
676:                            //
677:                            //  Now set this state in the transition table's entry
678:                            //  for this element (using its index), with the DFA
679:                            //  state we will move to from the current state when we
680:                            //  see this input element.
681:                            //
682:                            transEntry[elemIndex] = stateIndex;
683:
684:                            // Expand the arrays if we're full
685:                            if (curState == curArraySize) {
686:                                //
687:                                //  Yikes, we overflowed the initial array size, so
688:                                //  we've got to expand all of these arrays. So adjust
689:                                //  up the size by 50% and allocate new arrays.
690:                                //
691:                                final int newSize = (int) (curArraySize * 1.5);
692:                                CMStateSet[] newToDo = new CMStateSet[newSize];
693:                                boolean[] newFinalFlags = new boolean[newSize];
694:                                int[][] newTransTable = new int[newSize][];
695:
696:                                // Copy over all of the existing content
697:                                System.arraycopy(statesToDo, 0, newToDo, 0,
698:                                        curArraySize);
699:                                System.arraycopy(fFinalStateFlags, 0,
700:                                        newFinalFlags, 0, curArraySize);
701:                                System.arraycopy(fTransTable, 0, newTransTable,
702:                                        0, curArraySize);
703:
704:                                // Store the new array size
705:                                curArraySize = newSize;
706:                                statesToDo = newToDo;
707:                                fFinalStateFlags = newFinalFlags;
708:                                fTransTable = newTransTable;
709:                            }
710:                        }
711:                    }
712:                }
713:
714:                // Check to see if we can set the fEmptyContentIsValid flag.
715:                fEmptyContentIsValid = ((CMBinOp) fHeadNode).getLeft()
716:                        .isNullable();
717:
718:                //
719:                //  And now we can say bye bye to the temp representation since we've
720:                //  built the DFA.
721:                //
722:                if (DEBUG_VALIDATE_CONTENT)
723:                    dumpTree(fHeadNode, 0);
724:                fHeadNode = null;
725:                fLeafList = null;
726:                fFollowList = null;
727:
728:            }
729:
730:            /**
731:             * Calculates the follow list of the current node.
732:             *
733:             * @param nodeCur The curent node.
734:             *
735:             * @exception CMException Thrown if follow list cannot be calculated.
736:             */
737:            private void calcFollowList(CMNode nodeCur) {
738:                // Recurse as required
739:                if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE) {
740:                    // Recurse only
741:                    calcFollowList(((CMBinOp) nodeCur).getLeft());
742:                    calcFollowList(((CMBinOp) nodeCur).getRight());
743:                } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ) {
744:                    // Recurse first
745:                    calcFollowList(((CMBinOp) nodeCur).getLeft());
746:                    calcFollowList(((CMBinOp) nodeCur).getRight());
747:
748:                    //
749:                    //  Now handle our level. We use our left child's last pos
750:                    //  set and our right child's first pos set, so go ahead and
751:                    //  get them ahead of time.
752:                    //
753:                    final CMStateSet last = ((CMBinOp) nodeCur).getLeft()
754:                            .lastPos();
755:                    final CMStateSet first = ((CMBinOp) nodeCur).getRight()
756:                            .firstPos();
757:
758:                    //
759:                    //  Now, for every position which is in our left child's last set
760:                    //  add all of the states in our right child's first set to the
761:                    //  follow set for that position.
762:                    //
763:                    for (int index = 0; index < fLeafCount; index++) {
764:                        if (last.getBit(index))
765:                            fFollowList[index].union(first);
766:                    }
767:                }
768:                /***
769:                else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
770:                {
771:                   // Recurse first
772:                   calcFollowList(((CMUniOp)nodeCur).getChild());
773:
774:                   //
775:                   //  Now handle our level. We use our own first and last position
776:                   //  sets, so get them up front.
777:                   //
778:                   final CMStateSet first = nodeCur.firstPos();
779:                   final CMStateSet last  = nodeCur.lastPos();
780:
781:                   //
782:                   //  For every position which is in our last position set, add all
783:                   //  of our first position states to the follow set for that
784:                   //  position.
785:                   //
786:                   for (int index = 0; index < fLeafCount; index++)
787:                   {
788:                       if (last.getBit(index))
789:                           fFollowList[index].union(first);
790:                   }
791:                }
792:                else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
793:                     ||  (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
794:                {
795:                   throw new RuntimeException("ImplementationMessages.VAL_NIICM");
796:                }
797:                /***/
798:                else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
799:                        || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE) {
800:                    // Recurse first
801:                    calcFollowList(((CMUniOp) nodeCur).getChild());
802:
803:                    //
804:                    //  Now handle our level. We use our own first and last position
805:                    //  sets, so get them up front.
806:                    //
807:                    final CMStateSet first = nodeCur.firstPos();
808:                    final CMStateSet last = nodeCur.lastPos();
809:
810:                    //
811:                    //  For every position which is in our last position set, add all
812:                    //  of our first position states to the follow set for that
813:                    //  position.
814:                    //
815:                    for (int index = 0; index < fLeafCount; index++) {
816:                        if (last.getBit(index))
817:                            fFollowList[index].union(first);
818:                    }
819:                }
820:
821:                else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
822:                    // Recurse only
823:                    calcFollowList(((CMUniOp) nodeCur).getChild());
824:                }
825:                /***/
826:            }
827:
828:            /**
829:             * Dumps the tree of the current node to standard output.
830:             *
831:             * @param nodeCur The current node.
832:             * @param level   The maximum levels to output.
833:             *
834:             * @exception CMException Thrown on error.
835:             */
836:            private void dumpTree(CMNode nodeCur, int level) {
837:                for (int index = 0; index < level; index++)
838:                    System.out.print("   ");
839:
840:                int type = nodeCur.type();
841:                if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
842:                        || (type == XMLContentSpec.CONTENTSPECNODE_SEQ)) {
843:                    if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
844:                        System.out.print("Choice Node ");
845:                    else
846:                        System.out.print("Seq Node ");
847:
848:                    if (nodeCur.isNullable())
849:                        System.out.print("Nullable ");
850:
851:                    System.out.print("firstPos=");
852:                    System.out.print(nodeCur.firstPos().toString());
853:                    System.out.print(" lastPos=");
854:                    System.out.println(nodeCur.lastPos().toString());
855:
856:                    dumpTree(((CMBinOp) nodeCur).getLeft(), level + 1);
857:                    dumpTree(((CMBinOp) nodeCur).getRight(), level + 1);
858:                } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE) {
859:                    System.out.print("Rep Node ");
860:
861:                    if (nodeCur.isNullable())
862:                        System.out.print("Nullable ");
863:
864:                    System.out.print("firstPos=");
865:                    System.out.print(nodeCur.firstPos().toString());
866:                    System.out.print(" lastPos=");
867:                    System.out.println(nodeCur.lastPos().toString());
868:
869:                    dumpTree(((CMUniOp) nodeCur).getChild(), level + 1);
870:                } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) {
871:                    System.out.print("Leaf: (pos="
872:                            + ((CMLeaf) nodeCur).getPosition() + "), "
873:                            + ((CMLeaf) nodeCur).getElement() + "(elemIndex="
874:                            + ((CMLeaf) nodeCur).getElement() + ") ");
875:
876:                    if (nodeCur.isNullable())
877:                        System.out.print(" Nullable ");
878:
879:                    System.out.print("firstPos=");
880:                    System.out.print(nodeCur.firstPos().toString());
881:                    System.out.print(" lastPos=");
882:                    System.out.println(nodeCur.lastPos().toString());
883:                } else {
884:                    throw new RuntimeException(
885:                            "ImplementationMessages.VAL_NIICM");
886:                }
887:            }
888:
889:            /**
890:             * -1 is used to represent bad transitions in the transition table
891:             * entry for each state. So each entry is initialized to an all -1
892:             * array. This method creates a new entry and initializes it.
893:             */
894:            private int[] makeDefStateList() {
895:                int[] retArray = new int[fElemMapSize];
896:                for (int index = 0; index < fElemMapSize; index++)
897:                    retArray[index] = -1;
898:                return retArray;
899:            }
900:
901:            /** Post tree build initialization. */
902:            private int postTreeBuildInit(CMNode nodeCur, int curIndex) {
903:                // Set the maximum states on this node
904:                nodeCur.setMaxStates(fLeafCount);
905:
906:                // Recurse as required
907:                if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY
908:                        || (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL
909:                        || (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
910:                    // REVISIT: Don't waste these structures.
911:                    QName qname = new QName(null, null, null, ((CMAny) nodeCur)
912:                            .getURI());
913:                    fLeafList[curIndex] = new CMLeaf(qname, ((CMAny) nodeCur)
914:                            .getPosition());
915:                    fLeafListType[curIndex] = nodeCur.type();
916:                    curIndex++;
917:                } else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
918:                        || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)) {
919:                    curIndex = postTreeBuildInit(((CMBinOp) nodeCur).getLeft(),
920:                            curIndex);
921:                    curIndex = postTreeBuildInit(
922:                            ((CMBinOp) nodeCur).getRight(), curIndex);
923:                } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
924:                        || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
925:                        || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
926:                    curIndex = postTreeBuildInit(
927:                            ((CMUniOp) nodeCur).getChild(), curIndex);
928:                } else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) {
929:                    //
930:                    //  Put this node in the leaf list at the current index if its
931:                    //  a non-epsilon leaf.
932:                    //
933:                    final QName node = ((CMLeaf) nodeCur).getElement();
934:                    if (node.localpart != fEpsilonString) {
935:                        fLeafList[curIndex] = (CMLeaf) nodeCur;
936:                        fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
937:                        curIndex++;
938:                    }
939:                } else {
940:                    throw new RuntimeException(
941:                            "ImplementationMessages.VAL_NIICM: type="
942:                                    + nodeCur.type());
943:                }
944:                return curIndex;
945:            }
946:
947:        } // class DFAContentModel
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.