Source Code Cross Referenced for RECompiler.java in  » Library » jakarta-regexp-1.5 » org » apache » regexp » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Library » jakarta regexp 1.5 » org.apache.regexp 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Licensed to the Apache Software Foundation (ASF) under one or more
0003:         * contributor license agreements.  See the NOTICE file distributed with
0004:         * this work for additional information regarding copyright ownership.
0005:         * The ASF licenses this file to You under the Apache License, Version 2.0
0006:         * (the "License"); you may not use this file except in compliance with
0007:         * the License.  You may obtain a copy of the License at
0008:         *
0009:         *     http://www.apache.org/licenses/LICENSE-2.0
0010:         *
0011:         * Unless required by applicable law or agreed to in writing, software
0012:         * distributed under the License is distributed on an "AS IS" BASIS,
0013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014:         * See the License for the specific language governing permissions and
0015:         * limitations under the License.
0016:         */
0017:
0018:        package org.apache.regexp;
0019:
0020:        import java.util.Hashtable;
0021:
0022:        /**
0023:         * A regular expression compiler class.  This class compiles a pattern string into a
0024:         * regular expression program interpretable by the RE evaluator class.  The 'recompile'
0025:         * command line tool uses this compiler to pre-compile regular expressions for use
0026:         * with RE.  For a description of the syntax accepted by RECompiler and what you can
0027:         * do with regular expressions, see the documentation for the RE matcher class.
0028:         *
0029:         * @see RE
0030:         * @see recompile
0031:         *
0032:         * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
0033:         * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
0034:         * @version $Id: RECompiler.java 518156 2007-03-14 14:31:26Z vgritsenko $
0035:         */
0036:        public class RECompiler {
0037:            // The compiled program
0038:            char[] instruction; // The compiled RE 'program' instruction buffer
0039:            int lenInstruction; // The amount of the program buffer currently in use
0040:
0041:            // Input state for compiling regular expression
0042:            String pattern; // Input string
0043:            int len; // Length of the pattern string
0044:            int idx; // Current input index into ac
0045:            int parens; // Total number of paren pairs
0046:
0047:            // Node flags
0048:            static final int NODE_NORMAL = 0; // No flags (nothing special)
0049:            static final int NODE_NULLABLE = 1; // True if node is potentially null
0050:            static final int NODE_TOPLEVEL = 2; // True if top level expr
0051:
0052:            // Special types of 'escapes'
0053:            static final int ESC_MASK = 0xffff0; // Escape complexity mask
0054:            static final int ESC_BACKREF = 0xfffff; // Escape is really a backreference
0055:            static final int ESC_COMPLEX = 0xffffe; // Escape isn't really a true character
0056:            static final int ESC_CLASS = 0xffffd; // Escape represents a whole class of characters
0057:
0058:            // {m,n} stacks
0059:            static final int bracketUnbounded = -1; // Unbounded value
0060:            int bracketMin; // Minimum number of matches
0061:            int bracketOpt; // Additional optional matches
0062:
0063:            // Lookup table for POSIX character class names
0064:            static final Hashtable hashPOSIX = new Hashtable();
0065:            static {
0066:                hashPOSIX.put("alnum", new Character(RE.POSIX_CLASS_ALNUM));
0067:                hashPOSIX.put("alpha", new Character(RE.POSIX_CLASS_ALPHA));
0068:                hashPOSIX.put("blank", new Character(RE.POSIX_CLASS_BLANK));
0069:                hashPOSIX.put("cntrl", new Character(RE.POSIX_CLASS_CNTRL));
0070:                hashPOSIX.put("digit", new Character(RE.POSIX_CLASS_DIGIT));
0071:                hashPOSIX.put("graph", new Character(RE.POSIX_CLASS_GRAPH));
0072:                hashPOSIX.put("lower", new Character(RE.POSIX_CLASS_LOWER));
0073:                hashPOSIX.put("print", new Character(RE.POSIX_CLASS_PRINT));
0074:                hashPOSIX.put("punct", new Character(RE.POSIX_CLASS_PUNCT));
0075:                hashPOSIX.put("space", new Character(RE.POSIX_CLASS_SPACE));
0076:                hashPOSIX.put("upper", new Character(RE.POSIX_CLASS_UPPER));
0077:                hashPOSIX.put("xdigit", new Character(RE.POSIX_CLASS_XDIGIT));
0078:                hashPOSIX
0079:                        .put("javastart", new Character(RE.POSIX_CLASS_JSTART));
0080:                hashPOSIX.put("javapart", new Character(RE.POSIX_CLASS_JPART));
0081:            }
0082:
0083:            /**
0084:             * Constructor.  Creates (initially empty) storage for a regular expression program.
0085:             */
0086:            public RECompiler() {
0087:                // Start off with a generous, yet reasonable, initial size
0088:                instruction = new char[128];
0089:                lenInstruction = 0;
0090:            }
0091:
0092:            /**
0093:             * Ensures that n more characters can fit in the program buffer.
0094:             * If n more can't fit, then the size is doubled until it can.
0095:             * @param n Number of additional characters to ensure will fit.
0096:             */
0097:            void ensure(int n) {
0098:                // Get current program length
0099:                int curlen = instruction.length;
0100:
0101:                // If the current length + n more is too much
0102:                if (lenInstruction + n >= curlen) {
0103:                    // Double the size of the program array until n more will fit
0104:                    while (lenInstruction + n >= curlen) {
0105:                        curlen *= 2;
0106:                    }
0107:
0108:                    // Allocate new program array and move data into it
0109:                    char[] newInstruction = new char[curlen];
0110:                    System.arraycopy(instruction, 0, newInstruction, 0,
0111:                            lenInstruction);
0112:                    instruction = newInstruction;
0113:                }
0114:            }
0115:
0116:            /**
0117:             * Emit a single character into the program stream.
0118:             * @param c Character to add
0119:             */
0120:            void emit(char c) {
0121:                // Make room for character
0122:                ensure(1);
0123:
0124:                // Add character
0125:                instruction[lenInstruction++] = c;
0126:            }
0127:
0128:            /**
0129:             * Inserts a node with a given opcode and opdata at insertAt.  The node relative next
0130:             * pointer is initialized to 0.
0131:             * @param opcode Opcode for new node
0132:             * @param opdata Opdata for new node (only the low 16 bits are currently used)
0133:             * @param insertAt Index at which to insert the new node in the program
0134:             */
0135:            void nodeInsert(char opcode, int opdata, int insertAt) {
0136:                // Make room for a new node
0137:                ensure(RE.nodeSize);
0138:
0139:                // Move everything from insertAt to the end down nodeSize elements
0140:                System.arraycopy(instruction, insertAt, instruction, insertAt
0141:                        + RE.nodeSize, lenInstruction - insertAt);
0142:                instruction[insertAt /* + RE.offsetOpcode */] = opcode;
0143:                instruction[insertAt + RE.offsetOpdata] = (char) opdata;
0144:                instruction[insertAt + RE.offsetNext] = 0;
0145:                lenInstruction += RE.nodeSize;
0146:            }
0147:
0148:            /**
0149:             * Appends a node to the end of a node chain
0150:             * @param node Start of node chain to traverse
0151:             * @param pointTo Node to have the tail of the chain point to
0152:             */
0153:            void setNextOfEnd(int node, int pointTo) {
0154:                // Traverse the chain until the next offset is 0
0155:                int next = instruction[node + RE.offsetNext];
0156:                // while the 'node' is not the last in the chain
0157:                // and the 'node' is not the last in the program.
0158:                while (next != 0 && node < lenInstruction) {
0159:                    // if the node we are supposed to point to is in the chain then
0160:                    // point to the end of the program instead.
0161:                    // Michael McCallum <gholam@xtra.co.nz>
0162:                    // FIXME: This is a _hack_ to stop infinite programs.
0163:                    // I believe that the implementation of the reluctant matches is wrong but
0164:                    // have not worked out a better way yet.
0165:                    if (node == pointTo) {
0166:                        pointTo = lenInstruction;
0167:                    }
0168:                    node += next;
0169:                    next = instruction[node + RE.offsetNext];
0170:                }
0171:
0172:                // if we have reached the end of the program then dont set the pointTo.
0173:                // im not sure if this will break any thing but passes all the tests.
0174:                if (node < lenInstruction) {
0175:                    // Some patterns result in very large programs which exceed
0176:                    // capacity of the short used for specifying signed offset of the
0177:                    // next instruction. Example: a{1638}
0178:                    int offset = pointTo - node;
0179:                    if (offset != (short) offset) {
0180:                        throw new RESyntaxException(
0181:                                "Exceeded short jump range.");
0182:                    }
0183:
0184:                    // Point the last node in the chain to pointTo.
0185:                    instruction[node + RE.offsetNext] = (char) (short) offset;
0186:                }
0187:            }
0188:
0189:            /**
0190:             * Adds a new node
0191:             * @param opcode Opcode for node
0192:             * @param opdata Opdata for node (only the low 16 bits are currently used)
0193:             * @return Index of new node in program
0194:             */
0195:            int node(char opcode, int opdata) {
0196:                // Make room for a new node
0197:                ensure(RE.nodeSize);
0198:
0199:                // Add new node at end
0200:                instruction[lenInstruction /* + RE.offsetOpcode */] = opcode;
0201:                instruction[lenInstruction + RE.offsetOpdata] = (char) opdata;
0202:                instruction[lenInstruction + RE.offsetNext] = 0;
0203:                lenInstruction += RE.nodeSize;
0204:
0205:                // Return index of new node
0206:                return lenInstruction - RE.nodeSize;
0207:            }
0208:
0209:            /**
0210:             * Throws a new internal error exception
0211:             * @exception Error Thrown in the event of an internal error.
0212:             */
0213:            void internalError() throws Error {
0214:                throw new Error("Internal error!");
0215:            }
0216:
0217:            /**
0218:             * Throws a new syntax error exception
0219:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
0220:             */
0221:            void syntaxError(String s) throws RESyntaxException {
0222:                throw new RESyntaxException(s);
0223:            }
0224:
0225:            /**
0226:             * Match bracket {m,n} expression put results in bracket member variables
0227:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
0228:             */
0229:            void bracket() throws RESyntaxException {
0230:                // Current character must be a '{'
0231:                if (idx >= len || pattern.charAt(idx++) != '{') {
0232:                    internalError();
0233:                }
0234:
0235:                // Next char must be a digit
0236:                if (idx >= len || !Character.isDigit(pattern.charAt(idx))) {
0237:                    syntaxError("Expected digit");
0238:                }
0239:
0240:                // Get min ('m' of {m,n}) number
0241:                StringBuffer number = new StringBuffer();
0242:                while (idx < len && Character.isDigit(pattern.charAt(idx))) {
0243:                    number.append(pattern.charAt(idx++));
0244:                }
0245:                try {
0246:                    bracketMin = Integer.parseInt(number.toString());
0247:                } catch (NumberFormatException e) {
0248:                    syntaxError("Expected valid number");
0249:                }
0250:
0251:                // If out of input, fail
0252:                if (idx >= len) {
0253:                    syntaxError("Expected comma or right bracket");
0254:                }
0255:
0256:                // If end of expr, optional limit is 0
0257:                if (pattern.charAt(idx) == '}') {
0258:                    idx++;
0259:                    bracketOpt = 0;
0260:                    return;
0261:                }
0262:
0263:                // Must have at least {m,} and maybe {m,n}.
0264:                if (idx >= len || pattern.charAt(idx++) != ',') {
0265:                    syntaxError("Expected comma");
0266:                }
0267:
0268:                // If out of input, fail
0269:                if (idx >= len) {
0270:                    syntaxError("Expected comma or right bracket");
0271:                }
0272:
0273:                // If {m,} max is unlimited
0274:                if (pattern.charAt(idx) == '}') {
0275:                    idx++;
0276:                    bracketOpt = bracketUnbounded;
0277:                    return;
0278:                }
0279:
0280:                // Next char must be a digit
0281:                if (idx >= len || !Character.isDigit(pattern.charAt(idx))) {
0282:                    syntaxError("Expected digit");
0283:                }
0284:
0285:                // Get max number
0286:                number.setLength(0);
0287:                while (idx < len && Character.isDigit(pattern.charAt(idx))) {
0288:                    number.append(pattern.charAt(idx++));
0289:                }
0290:                try {
0291:                    bracketOpt = Integer.parseInt(number.toString())
0292:                            - bracketMin;
0293:                } catch (NumberFormatException e) {
0294:                    syntaxError("Expected valid number");
0295:                }
0296:
0297:                // Optional repetitions must be >= 0
0298:                if (bracketOpt < 0) {
0299:                    syntaxError("Bad range");
0300:                }
0301:
0302:                // Must have close brace
0303:                if (idx >= len || pattern.charAt(idx++) != '}') {
0304:                    syntaxError("Missing close brace");
0305:                }
0306:            }
0307:
0308:            /**
0309:             * Match an escape sequence.  Handles quoted chars and octal escapes as well
0310:             * as normal escape characters.  Always advances the input stream by the
0311:             * right amount. This code "understands" the subtle difference between an
0312:             * octal escape and a backref.  You can access the type of ESC_CLASS or
0313:             * ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1].
0314:             * @return ESC_* code or character if simple escape
0315:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
0316:             */
0317:            int escape() throws RESyntaxException {
0318:                // "Shouldn't" happen
0319:                if (pattern.charAt(idx) != '\\') {
0320:                    internalError();
0321:                }
0322:
0323:                // Escape shouldn't occur as last character in string!
0324:                if (idx + 1 == len) {
0325:                    syntaxError("Escape terminates string");
0326:                }
0327:
0328:                // Switch on character after backslash
0329:                idx += 2;
0330:                char escapeChar = pattern.charAt(idx - 1);
0331:                switch (escapeChar) {
0332:                case RE.E_BOUND:
0333:                case RE.E_NBOUND:
0334:                    return ESC_COMPLEX;
0335:
0336:                case RE.E_ALNUM:
0337:                case RE.E_NALNUM:
0338:                case RE.E_SPACE:
0339:                case RE.E_NSPACE:
0340:                case RE.E_DIGIT:
0341:                case RE.E_NDIGIT:
0342:                    return ESC_CLASS;
0343:
0344:                case 'u':
0345:                case 'x': {
0346:                    // Exact required hex digits for escape type
0347:                    int hexDigits = (escapeChar == 'u' ? 4 : 2);
0348:
0349:                    // Parse up to hexDigits characters from input
0350:                    int val = 0;
0351:                    for (; idx < len && hexDigits-- > 0; idx++) {
0352:                        // Get char
0353:                        char c = pattern.charAt(idx);
0354:
0355:                        // If it's a hexadecimal digit (0-9)
0356:                        if (c >= '0' && c <= '9') {
0357:                            // Compute new value
0358:                            val = (val << 4) + c - '0';
0359:                        } else {
0360:                            // If it's a hexadecimal letter (a-f)
0361:                            c = Character.toLowerCase(c);
0362:                            if (c >= 'a' && c <= 'f') {
0363:                                // Compute new value
0364:                                val = (val << 4) + (c - 'a') + 10;
0365:                            } else {
0366:                                // If it's not a valid digit or hex letter, the escape must be invalid
0367:                                // because hexDigits of input have not been absorbed yet.
0368:                                syntaxError("Expected " + hexDigits
0369:                                        + " hexadecimal digits after \\"
0370:                                        + escapeChar);
0371:                            }
0372:                        }
0373:                    }
0374:                    return val;
0375:                }
0376:
0377:                case 't':
0378:                    return '\t';
0379:
0380:                case 'n':
0381:                    return '\n';
0382:
0383:                case 'r':
0384:                    return '\r';
0385:
0386:                case 'f':
0387:                    return '\f';
0388:
0389:                case '0':
0390:                case '1':
0391:                case '2':
0392:                case '3':
0393:                case '4':
0394:                case '5':
0395:                case '6':
0396:                case '7':
0397:                case '8':
0398:                case '9':
0399:
0400:                    // An octal escape starts with a 0 or has two digits in a row
0401:                    if ((idx < len && Character.isDigit(pattern.charAt(idx)))
0402:                            || escapeChar == '0') {
0403:                        // Handle \nnn octal escapes
0404:                        int val = escapeChar - '0';
0405:                        if (idx < len && Character.isDigit(pattern.charAt(idx))) {
0406:                            val = ((val << 3) + (pattern.charAt(idx++) - '0'));
0407:                            if (idx < len
0408:                                    && Character.isDigit(pattern.charAt(idx))) {
0409:                                val = ((val << 3) + (pattern.charAt(idx++) - '0'));
0410:                            }
0411:                        }
0412:                        return val;
0413:                    }
0414:
0415:                    // It's actually a backreference (\[1-9]), not an escape
0416:                    return ESC_BACKREF;
0417:
0418:                default:
0419:
0420:                    // Simple quoting of a character
0421:                    return escapeChar;
0422:                }
0423:            }
0424:
0425:            /**
0426:             * Compile a character class
0427:             * @return Index of class node
0428:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
0429:             */
0430:            int characterClass() throws RESyntaxException {
0431:                // Check for bad calling or empty class
0432:                if (pattern.charAt(idx) != '[') {
0433:                    internalError();
0434:                }
0435:
0436:                // Check for unterminated or empty class
0437:                if ((idx + 1) >= len || pattern.charAt(++idx) == ']') {
0438:                    syntaxError("Empty or unterminated class");
0439:                }
0440:
0441:                // Check for POSIX character class
0442:                if (idx < len && pattern.charAt(idx) == ':') {
0443:                    // Skip colon
0444:                    idx++;
0445:
0446:                    // POSIX character classes are denoted with lowercase ASCII strings
0447:                    int idxStart = idx;
0448:                    while (idx < len && pattern.charAt(idx) >= 'a'
0449:                            && pattern.charAt(idx) <= 'z') {
0450:                        idx++;
0451:                    }
0452:
0453:                    // Should be a ":]" to terminate the POSIX character class
0454:                    if ((idx + 1) < len && pattern.charAt(idx) == ':'
0455:                            && pattern.charAt(idx + 1) == ']') {
0456:                        // Get character class
0457:                        String charClass = pattern.substring(idxStart, idx);
0458:
0459:                        // Select the POSIX class id
0460:                        Character i = (Character) hashPOSIX.get(charClass);
0461:                        if (i != null) {
0462:                            // Move past colon and right bracket
0463:                            idx += 2;
0464:
0465:                            // Return new POSIX character class node
0466:                            return node(RE.OP_POSIXCLASS, i.charValue());
0467:                        }
0468:                        syntaxError("Invalid POSIX character class '"
0469:                                + charClass + "'");
0470:                    }
0471:                    syntaxError("Invalid POSIX character class syntax");
0472:                }
0473:
0474:                // Try to build a class.  Create OP_ANYOF node
0475:                int ret = node(RE.OP_ANYOF, 0);
0476:
0477:                // Parse class declaration
0478:                char CHAR_INVALID = Character.MAX_VALUE;
0479:                char last = CHAR_INVALID;
0480:                char simpleChar;
0481:                boolean include = true;
0482:                boolean definingRange = false;
0483:                int idxFirst = idx;
0484:                char rangeStart = Character.MIN_VALUE;
0485:                char rangeEnd;
0486:                RERange range = new RERange();
0487:                while (idx < len && pattern.charAt(idx) != ']') {
0488:
0489:                    switchOnCharacter:
0490:
0491:                    // Switch on character
0492:                    switch (pattern.charAt(idx)) {
0493:                    case '^':
0494:                        include = !include;
0495:                        if (idx == idxFirst) {
0496:                            range.include(Character.MIN_VALUE,
0497:                                    Character.MAX_VALUE, true);
0498:                        }
0499:                        idx++;
0500:                        continue;
0501:
0502:                    case '\\': {
0503:                        // Escape always advances the stream
0504:                        int c;
0505:                        switch (c = escape()) {
0506:                        case ESC_COMPLEX:
0507:                        case ESC_BACKREF:
0508:
0509:                            // Word boundaries and backrefs not allowed in a character class!
0510:                            syntaxError("Bad character class");
0511:
0512:                        case ESC_CLASS:
0513:
0514:                            // Classes can't be an endpoint of a range
0515:                            if (definingRange) {
0516:                                syntaxError("Bad character class");
0517:                            }
0518:
0519:                            // Handle specific type of class (some are ok)
0520:                            switch (pattern.charAt(idx - 1)) {
0521:                            case RE.E_NSPACE:
0522:                                range.include(Character.MIN_VALUE, 7, include); // [Min - \b )
0523:                                range.include((char) 11, include); // ( \n - \f )
0524:                                range.include(14, 31, include); // ( \r - ' ')
0525:                                range.include(33, Character.MAX_VALUE, include); // (' ' - Max]
0526:                                break;
0527:
0528:                            case RE.E_NALNUM:
0529:                                range
0530:                                        .include(Character.MIN_VALUE, '/',
0531:                                                include); // [Min - '0')
0532:                                range.include(':', '@', include); // ('9' - 'A')
0533:                                range.include('[', '^', include); // ('Z' - '_')
0534:                                range.include('`', include); // ('_' - 'a')
0535:                                range
0536:                                        .include('{', Character.MAX_VALUE,
0537:                                                include); // ('z' - Max]
0538:                                break;
0539:
0540:                            case RE.E_NDIGIT:
0541:                                range
0542:                                        .include(Character.MIN_VALUE, '/',
0543:                                                include); // [Min - '0')
0544:                                range
0545:                                        .include(':', Character.MAX_VALUE,
0546:                                                include); // ('9' - Max]
0547:                                break;
0548:
0549:                            case RE.E_SPACE:
0550:                                range.include('\t', include);
0551:                                range.include('\r', include);
0552:                                range.include('\f', include);
0553:                                range.include('\n', include);
0554:                                range.include('\b', include);
0555:                                range.include(' ', include);
0556:                                break;
0557:
0558:                            case RE.E_ALNUM:
0559:                                range.include('a', 'z', include);
0560:                                range.include('A', 'Z', include);
0561:                                range.include('_', include);
0562:
0563:                                // Fall through!
0564:
0565:                            case RE.E_DIGIT:
0566:                                range.include('0', '9', include);
0567:                                break;
0568:                            }
0569:
0570:                            // Make last char invalid (can't be a range start)
0571:                            last = CHAR_INVALID;
0572:                            break;
0573:
0574:                        default:
0575:
0576:                            // Escape is simple so treat as a simple char
0577:                            simpleChar = (char) c;
0578:                            break switchOnCharacter;
0579:                        }
0580:                    }
0581:                        continue;
0582:
0583:                    case '-':
0584:
0585:                        // Start a range if one isn't already started
0586:                        if (definingRange) {
0587:                            syntaxError("Bad class range");
0588:                        }
0589:                        definingRange = true;
0590:
0591:                        // If no last character, start of range is 0
0592:                        rangeStart = (last == CHAR_INVALID ? 0 : last);
0593:
0594:                        // Premature end of range. define up to Character.MAX_VALUE
0595:                        if ((idx + 1) < len && pattern.charAt(++idx) == ']') {
0596:                            simpleChar = Character.MAX_VALUE;
0597:                            break;
0598:                        }
0599:                        continue;
0600:
0601:                    default:
0602:                        simpleChar = pattern.charAt(idx++);
0603:                        break;
0604:                    }
0605:
0606:                    // Handle simple character simpleChar
0607:                    if (definingRange) {
0608:                        // if we are defining a range make it now
0609:                        rangeEnd = simpleChar;
0610:
0611:                        // Actually create a range if the range is ok
0612:                        if (rangeStart >= rangeEnd) {
0613:                            syntaxError("Bad character class");
0614:                        }
0615:                        range.include(rangeStart, rangeEnd, include);
0616:
0617:                        // We are done defining the range
0618:                        last = CHAR_INVALID;
0619:                        definingRange = false;
0620:                    } else {
0621:                        // If simple character and not start of range, include it
0622:                        if (idx >= len || pattern.charAt(idx) != '-') {
0623:                            range.include(simpleChar, include);
0624:                        }
0625:                        last = simpleChar;
0626:                    }
0627:                }
0628:
0629:                // Shouldn't be out of input
0630:                if (idx == len) {
0631:                    syntaxError("Unterminated character class");
0632:                }
0633:
0634:                // Absorb the ']' end of class marker
0635:                idx++;
0636:
0637:                // Emit character class definition
0638:                instruction[ret + RE.offsetOpdata] = (char) range.num;
0639:                for (int i = 0; i < range.num; i++) {
0640:                    emit((char) range.minRange[i]);
0641:                    emit((char) range.maxRange[i]);
0642:                }
0643:                return ret;
0644:            }
0645:
0646:            /**
0647:             * Absorb an atomic character string.  This method is a little tricky because
0648:             * it can un-include the last character of string if a closure operator follows.
0649:             * This is correct because *+? have higher precedence than concatentation (thus
0650:             * ABC* means AB(C*) and NOT (ABC)*).
0651:             * @return Index of new atom node
0652:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
0653:             */
0654:            int atom() throws RESyntaxException {
0655:                // Create a string node
0656:                int ret = node(RE.OP_ATOM, 0);
0657:
0658:                // Length of atom
0659:                int lenAtom = 0;
0660:
0661:                // Loop while we've got input
0662:
0663:                atomLoop:
0664:
0665:                while (idx < len) {
0666:                    // Is there a next char?
0667:                    if ((idx + 1) < len) {
0668:                        char c = pattern.charAt(idx + 1);
0669:
0670:                        // If the next 'char' is an escape, look past the whole escape
0671:                        if (pattern.charAt(idx) == '\\') {
0672:                            int idxEscape = idx;
0673:                            escape();
0674:                            if (idx < len) {
0675:                                c = pattern.charAt(idx);
0676:                            }
0677:                            idx = idxEscape;
0678:                        }
0679:
0680:                        // Switch on next char
0681:                        switch (c) {
0682:                        case '{':
0683:                        case '?':
0684:                        case '*':
0685:                        case '+':
0686:
0687:                            // If the next character is a closure operator and our atom is non-empty, the
0688:                            // current character should bind to the closure operator rather than the atom
0689:                            if (lenAtom != 0) {
0690:                                break atomLoop;
0691:                            }
0692:                        }
0693:                    }
0694:
0695:                    // Switch on current char
0696:                    switch (pattern.charAt(idx)) {
0697:                    case ']':
0698:                    case '^':
0699:                    case '$':
0700:                    case '.':
0701:                    case '[':
0702:                    case '(':
0703:                    case ')':
0704:                    case '|':
0705:                        break atomLoop;
0706:
0707:                    case '{':
0708:                    case '?':
0709:                    case '*':
0710:                    case '+':
0711:
0712:                        // We should have an atom by now
0713:                        if (lenAtom == 0) {
0714:                            // No atom before closure
0715:                            syntaxError("Missing operand to closure");
0716:                        }
0717:                        break atomLoop;
0718:
0719:                    case '\\':
0720:
0721:                    {
0722:                        // Get the escaped character (advances input automatically)
0723:                        int idxBeforeEscape = idx;
0724:                        int c = escape();
0725:
0726:                        // Check if it's a simple escape (as opposed to, say, a backreference)
0727:                        if ((c & ESC_MASK) == ESC_MASK) {
0728:                            // Not a simple escape, so backup to where we were before the escape.
0729:                            idx = idxBeforeEscape;
0730:                            break atomLoop;
0731:                        }
0732:
0733:                        // Add escaped char to atom
0734:                        emit((char) c);
0735:                        lenAtom++;
0736:                    }
0737:                        break;
0738:
0739:                    default:
0740:
0741:                        // Add normal character to atom
0742:                        emit(pattern.charAt(idx++));
0743:                        lenAtom++;
0744:                        break;
0745:                    }
0746:                }
0747:
0748:                // This "shouldn't" happen
0749:                if (lenAtom == 0) {
0750:                    internalError();
0751:                }
0752:
0753:                // Emit the atom length into the program
0754:                instruction[ret + RE.offsetOpdata] = (char) lenAtom;
0755:                return ret;
0756:            }
0757:
0758:            /**
0759:             * Match a terminal node.
0760:             * @param flags Flags
0761:             * @return Index of terminal node (closeable)
0762:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
0763:             */
0764:            int terminal(int[] flags) throws RESyntaxException {
0765:                switch (pattern.charAt(idx)) {
0766:                case RE.OP_EOL:
0767:                case RE.OP_BOL:
0768:                case RE.OP_ANY:
0769:                    return node(pattern.charAt(idx++), 0);
0770:
0771:                case '[':
0772:                    return characterClass();
0773:
0774:                case '(':
0775:                    return expr(flags);
0776:
0777:                case ')':
0778:                    syntaxError("Unexpected close paren");
0779:
0780:                case '|':
0781:                    internalError();
0782:
0783:                case ']':
0784:                    syntaxError("Mismatched class");
0785:
0786:                case 0:
0787:                    syntaxError("Unexpected end of input");
0788:
0789:                case '?':
0790:                case '+':
0791:                case '{':
0792:                case '*':
0793:                    syntaxError("Missing operand to closure");
0794:
0795:                case '\\': {
0796:                    // Don't forget, escape() advances the input stream!
0797:                    int idxBeforeEscape = idx;
0798:
0799:                    // Switch on escaped character
0800:                    switch (escape()) {
0801:                    case ESC_CLASS:
0802:                    case ESC_COMPLEX:
0803:                        flags[0] &= ~NODE_NULLABLE;
0804:                        return node(RE.OP_ESCAPE, pattern.charAt(idx - 1));
0805:
0806:                    case ESC_BACKREF: {
0807:                        char backreference = (char) (pattern.charAt(idx - 1) - '0');
0808:                        if (parens <= backreference) {
0809:                            syntaxError("Bad backreference");
0810:                        }
0811:                        flags[0] |= NODE_NULLABLE;
0812:                        return node(RE.OP_BACKREF, backreference);
0813:                    }
0814:
0815:                    default:
0816:
0817:                        // We had a simple escape and we want to have it end up in
0818:                        // an atom, so we back up and fall though to the default handling
0819:                        idx = idxBeforeEscape;
0820:                        flags[0] &= ~NODE_NULLABLE;
0821:                        break;
0822:                    }
0823:                }
0824:                }
0825:
0826:                // Everything above either fails or returns.
0827:                // If it wasn't one of the above, it must be the start of an atom.
0828:                flags[0] &= ~NODE_NULLABLE;
0829:                return atom();
0830:            }
0831:
0832:            /**
0833:             * Compile a possibly closured terminal
0834:             * @param flags Flags passed by reference
0835:             * @return Index of closured node
0836:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
0837:             */
0838:            int closure(int[] flags) throws RESyntaxException {
0839:                // Before terminal
0840:                int idxBeforeTerminal = idx;
0841:
0842:                // Values to pass by reference to terminal()
0843:                int[] terminalFlags = { NODE_NORMAL };
0844:
0845:                // Get terminal symbol
0846:                int ret = terminal(terminalFlags);
0847:
0848:                // Or in flags from terminal symbol
0849:                flags[0] |= terminalFlags[0];
0850:
0851:                // Advance input, set NODE_NULLABLE flag and do sanity checks
0852:                if (idx >= len) {
0853:                    return ret;
0854:                }
0855:
0856:                boolean greedy = true;
0857:                char closureType = pattern.charAt(idx);
0858:                switch (closureType) {
0859:                case '?':
0860:                case '*':
0861:
0862:                    // The current node can be null
0863:                    flags[0] |= NODE_NULLABLE;
0864:
0865:                    // Drop through
0866:
0867:                case '+':
0868:
0869:                    // Eat closure character
0870:                    idx++;
0871:
0872:                    // Drop through
0873:
0874:                case '{':
0875:
0876:                    // Don't allow blantant stupidity
0877:                    int opcode = instruction[ret /* + RE.offsetOpcode */];
0878:                    if (opcode == RE.OP_BOL || opcode == RE.OP_EOL) {
0879:                        syntaxError("Bad closure operand");
0880:                    }
0881:                    if ((terminalFlags[0] & NODE_NULLABLE) != 0) {
0882:                        syntaxError("Closure operand can't be nullable");
0883:                    }
0884:                }
0885:
0886:                // If the next character is a '?', make the closure non-greedy (reluctant)
0887:                if (idx < len && pattern.charAt(idx) == '?') {
0888:                    idx++;
0889:                    greedy = false;
0890:                }
0891:
0892:                if (greedy) {
0893:                    // Actually do the closure now
0894:                    switch (closureType) {
0895:                    case '{': {
0896:                        bracket();
0897:                        int bracketEnd = idx;
0898:                        int bracketMin = this .bracketMin;
0899:                        int bracketOpt = this .bracketOpt;
0900:
0901:                        // Pointer to the last terminal
0902:                        int pos = ret;
0903:
0904:                        // Process min first
0905:                        for (int c = 0; c < bracketMin; c++) {
0906:                            // Rewind stream and run it through again - more matchers coming
0907:                            idx = idxBeforeTerminal;
0908:                            setNextOfEnd(pos, pos = terminal(terminalFlags));
0909:                        }
0910:
0911:                        // Do the right thing for maximum ({m,})
0912:                        if (bracketOpt == bracketUnbounded) {
0913:                            // Drop through now and closure expression.
0914:                            // We are done with the {m,} expr, so skip rest
0915:                            idx = bracketEnd;
0916:                            nodeInsert(RE.OP_STAR, 0, pos);
0917:                            setNextOfEnd(pos + RE.nodeSize, pos);
0918:                            break;
0919:                        } else if (bracketOpt > 0) {
0920:                            int opt[] = new int[bracketOpt + 1];
0921:                            // Surround first optional terminal with MAYBE
0922:                            nodeInsert(RE.OP_MAYBE, 0, pos);
0923:                            opt[0] = pos;
0924:
0925:                            // Add all the rest optional terminals with preceeding MAYBEs
0926:                            for (int c = 1; c < bracketOpt; c++) {
0927:                                opt[c] = node(RE.OP_MAYBE, 0);
0928:                                // Rewind stream and run it through again - more matchers coming
0929:                                idx = idxBeforeTerminal;
0930:                                terminal(terminalFlags);
0931:                            }
0932:
0933:                            // Tie ends together
0934:                            int end = opt[bracketOpt] = node(RE.OP_NOTHING, 0);
0935:                            for (int c = 0; c < bracketOpt; c++) {
0936:                                setNextOfEnd(opt[c], end);
0937:                                setNextOfEnd(opt[c] + RE.nodeSize, opt[c + 1]);
0938:                            }
0939:                        } else {
0940:                            // Rollback terminal - no opt matchers present
0941:                            lenInstruction = pos;
0942:                            node(RE.OP_NOTHING, 0);
0943:                        }
0944:
0945:                        // We are done. skip the reminder of {m,n} expr
0946:                        idx = bracketEnd;
0947:                        break;
0948:                    }
0949:
0950:                    case '?': {
0951:                        nodeInsert(RE.OP_MAYBE, 0, ret);
0952:                        int n = node(RE.OP_NOTHING, 0);
0953:                        setNextOfEnd(ret, n);
0954:                        setNextOfEnd(ret + RE.nodeSize, n);
0955:                        break;
0956:                    }
0957:
0958:                    case '*': {
0959:                        nodeInsert(RE.OP_STAR, 0, ret);
0960:                        setNextOfEnd(ret + RE.nodeSize, ret);
0961:                        break;
0962:                    }
0963:
0964:                    case '+': {
0965:                        nodeInsert(RE.OP_CONTINUE, 0, ret);
0966:                        int n = node(RE.OP_PLUS, 0);
0967:                        setNextOfEnd(ret + RE.nodeSize, n);
0968:                        setNextOfEnd(n, ret);
0969:                        break;
0970:                    }
0971:                    }
0972:                } else {
0973:                    // Actually do the closure now
0974:                    switch (closureType) {
0975:                    case '?': {
0976:                        nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
0977:                        int n = node(RE.OP_NOTHING, 0);
0978:                        setNextOfEnd(ret, n);
0979:                        setNextOfEnd(ret + RE.nodeSize, n);
0980:                        break;
0981:                    }
0982:
0983:                    case '*': {
0984:                        nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
0985:                        setNextOfEnd(ret + RE.nodeSize, ret);
0986:                        break;
0987:                    }
0988:
0989:                    case '+': {
0990:                        nodeInsert(RE.OP_CONTINUE, 0, ret);
0991:                        int n = node(RE.OP_RELUCTANTPLUS, 0);
0992:                        setNextOfEnd(n, ret);
0993:                        setNextOfEnd(ret + RE.nodeSize, n);
0994:                        break;
0995:                    }
0996:                    }
0997:                }
0998:
0999:                return ret;
1000:            }
1001:
1002:            /**
1003:             * Compile body of one branch of an or operator (implements concatenation)
1004:             *
1005:             * @param flags Flags passed by reference
1006:             * @return Pointer to first node in the branch
1007:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
1008:             */
1009:            int branch(int[] flags) throws RESyntaxException {
1010:                // Get each possibly closured piece and concat
1011:                int node;
1012:                int ret = -1;
1013:                int chain = -1;
1014:                int[] closureFlags = new int[1];
1015:                boolean nullable = true;
1016:                while (idx < len && pattern.charAt(idx) != '|'
1017:                        && pattern.charAt(idx) != ')') {
1018:                    // Get new node
1019:                    closureFlags[0] = NODE_NORMAL;
1020:                    node = closure(closureFlags);
1021:                    if (closureFlags[0] == NODE_NORMAL) {
1022:                        nullable = false;
1023:                    }
1024:
1025:                    // If there's a chain, append to the end
1026:                    if (chain != -1) {
1027:                        setNextOfEnd(chain, node);
1028:                    }
1029:
1030:                    // Chain starts at current
1031:                    chain = node;
1032:                    if (ret == -1) {
1033:                        ret = node;
1034:                    }
1035:                }
1036:
1037:                // If we don't run loop, make a nothing node
1038:                if (ret == -1) {
1039:                    ret = node(RE.OP_NOTHING, 0);
1040:                }
1041:
1042:                // Set nullable flag for this branch
1043:                if (nullable) {
1044:                    flags[0] |= NODE_NULLABLE;
1045:                }
1046:
1047:                return ret;
1048:            }
1049:
1050:            /**
1051:             * Compile an expression with possible parens around it.  Paren matching
1052:             * is done at this level so we can tie the branch tails together.
1053:             *
1054:             * @param flags Flag value passed by reference
1055:             * @return Node index of expression in instruction array
1056:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
1057:             */
1058:            int expr(int[] flags) throws RESyntaxException {
1059:                // Create open paren node unless we were called from the top level (which has no parens)
1060:                int paren = -1;
1061:                int ret = -1;
1062:                int closeParens = parens;
1063:                if ((flags[0] & NODE_TOPLEVEL) == 0
1064:                        && pattern.charAt(idx) == '(') {
1065:                    // if its a cluster ( rather than a proper subexpression ie with backrefs )
1066:                    if (idx + 2 < len && pattern.charAt(idx + 1) == '?'
1067:                            && pattern.charAt(idx + 2) == ':') {
1068:                        paren = 2;
1069:                        idx += 3;
1070:                        ret = node(RE.OP_OPEN_CLUSTER, 0);
1071:                    } else {
1072:                        paren = 1;
1073:                        idx++;
1074:                        ret = node(RE.OP_OPEN, parens++);
1075:                    }
1076:                }
1077:                flags[0] &= ~NODE_TOPLEVEL;
1078:
1079:                // Process contents of first branch node
1080:                boolean open = false;
1081:                int branch = branch(flags);
1082:                if (ret == -1) {
1083:                    ret = branch;
1084:                } else {
1085:                    setNextOfEnd(ret, branch);
1086:                }
1087:
1088:                // Loop through branches
1089:                while (idx < len && pattern.charAt(idx) == '|') {
1090:                    // Now open the first branch since there are more than one
1091:                    if (!open) {
1092:                        nodeInsert(RE.OP_BRANCH, 0, branch);
1093:                        open = true;
1094:                    }
1095:
1096:                    idx++;
1097:                    setNextOfEnd(branch, branch = node(RE.OP_BRANCH, 0));
1098:                    branch(flags);
1099:                }
1100:
1101:                // Create an ending node (either a close paren or an OP_END)
1102:                int end;
1103:                if (paren > 0) {
1104:                    if (idx < len && pattern.charAt(idx) == ')') {
1105:                        idx++;
1106:                    } else {
1107:                        syntaxError("Missing close paren");
1108:                    }
1109:                    if (paren == 1) {
1110:                        end = node(RE.OP_CLOSE, closeParens);
1111:                    } else {
1112:                        end = node(RE.OP_CLOSE_CLUSTER, 0);
1113:                    }
1114:                } else {
1115:                    end = node(RE.OP_END, 0);
1116:                }
1117:
1118:                // Append the ending node to the ret nodelist
1119:                setNextOfEnd(ret, end);
1120:
1121:                // Hook the ends of each branch to the end node
1122:                int currentNode = ret;
1123:                int nextNodeOffset = instruction[currentNode + RE.offsetNext];
1124:                // while the next node o
1125:                while (nextNodeOffset != 0 && currentNode < lenInstruction) {
1126:                    // If branch, make the end of the branch's operand chain point to the end node.
1127:                    if (instruction[currentNode /* + RE.offsetOpcode */] == RE.OP_BRANCH) {
1128:                        setNextOfEnd(currentNode + RE.nodeSize, end);
1129:                    }
1130:                    nextNodeOffset = instruction[currentNode + RE.offsetNext];
1131:                    currentNode += nextNodeOffset;
1132:                }
1133:
1134:                // Return the node list
1135:                return ret;
1136:            }
1137:
1138:            /**
1139:             * Compiles a regular expression pattern into a program runnable by the pattern
1140:             * matcher class 'RE'.
1141:             * @param pattern Regular expression pattern to compile (see RECompiler class
1142:             * for details).
1143:             * @return A compiled regular expression program.
1144:             * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
1145:             * @see RECompiler
1146:             * @see RE
1147:             */
1148:            public REProgram compile(String pattern) throws RESyntaxException {
1149:                // Initialize variables for compilation
1150:                this .pattern = pattern; // Save pattern in instance variable
1151:                len = pattern.length(); // Precompute pattern length for speed
1152:                idx = 0; // Set parsing index to the first character
1153:                lenInstruction = 0; // Set emitted instruction count to zero
1154:                parens = 1; // Set paren level to 1 (the implicit outer parens)
1155:
1156:                // Initialize pass by reference flags value
1157:                int[] flags = { NODE_TOPLEVEL };
1158:
1159:                // Parse expression
1160:                expr(flags);
1161:
1162:                // Should be at end of input
1163:                if (idx != len) {
1164:                    if (pattern.charAt(idx) == ')') {
1165:                        syntaxError("Unmatched close paren");
1166:                    }
1167:                    syntaxError("Unexpected input remains");
1168:                }
1169:
1170:                // Return the result
1171:                char[] ins = new char[lenInstruction];
1172:                System.arraycopy(instruction, 0, ins, 0, lenInstruction);
1173:                return new REProgram(parens, ins);
1174:            }
1175:
1176:            /**
1177:             * Local, nested class for maintaining character ranges for character classes.
1178:             */
1179:            class RERange {
1180:                int size = 16; // Capacity of current range arrays
1181:                int[] minRange = new int[size]; // Range minima
1182:                int[] maxRange = new int[size]; // Range maxima
1183:                int num = 0; // Number of range array elements in use
1184:
1185:                /**
1186:                 * Deletes the range at a given index from the range lists
1187:                 * @param index Index of range to delete from minRange and maxRange arrays.
1188:                 */
1189:                void delete(int index) {
1190:                    // Return if no elements left or index is out of range
1191:                    if (num == 0 || index >= num) {
1192:                        return;
1193:                    }
1194:
1195:                    // Move elements down
1196:                    while (++index < num) {
1197:                        if (index - 1 >= 0) {
1198:                            minRange[index - 1] = minRange[index];
1199:                            maxRange[index - 1] = maxRange[index];
1200:                        }
1201:                    }
1202:
1203:                    // One less element now
1204:                    num--;
1205:                }
1206:
1207:                /**
1208:                 * Merges a range into the range list, coalescing ranges if possible.
1209:                 * @param min Minimum end of range
1210:                 * @param max Maximum end of range
1211:                 */
1212:                void merge(int min, int max) {
1213:                    // Loop through ranges
1214:                    for (int i = 0; i < num; i++) {
1215:                        // Min-max is subsumed by minRange[i]-maxRange[i]
1216:                        if (min >= minRange[i] && max <= maxRange[i]) {
1217:                            return;
1218:                        }
1219:
1220:                        // Min-max subsumes minRange[i]-maxRange[i]
1221:                        else if (min <= minRange[i] && max >= maxRange[i]) {
1222:                            delete(i);
1223:                            merge(min, max);
1224:                            return;
1225:                        }
1226:
1227:                        // Min is in the range, but max is outside
1228:                        else if (min >= minRange[i] && min <= maxRange[i]) {
1229:                            min = minRange[i];
1230:                            delete(i);
1231:                            merge(min, max);
1232:                            return;
1233:                        }
1234:
1235:                        // Max is in the range, but min is outside
1236:                        else if (max >= minRange[i] && max <= maxRange[i]) {
1237:                            max = maxRange[i];
1238:                            delete(i);
1239:                            merge(min, max);
1240:                            return;
1241:                        }
1242:                    }
1243:
1244:                    // Must not overlap any other ranges
1245:                    if (num >= size) {
1246:                        size *= 2;
1247:                        int[] newMin = new int[size];
1248:                        int[] newMax = new int[size];
1249:                        System.arraycopy(minRange, 0, newMin, 0, num);
1250:                        System.arraycopy(maxRange, 0, newMax, 0, num);
1251:                        minRange = newMin;
1252:                        maxRange = newMax;
1253:                    }
1254:                    minRange[num] = min;
1255:                    maxRange[num] = max;
1256:                    num++;
1257:                }
1258:
1259:                /**
1260:                 * Removes a range by deleting or shrinking all other ranges
1261:                 * @param min Minimum end of range
1262:                 * @param max Maximum end of range
1263:                 */
1264:                void remove(int min, int max) {
1265:                    // Loop through ranges
1266:                    for (int i = 0; i < num; i++) {
1267:                        // minRange[i]-maxRange[i] is subsumed by min-max
1268:                        if (minRange[i] >= min && maxRange[i] <= max) {
1269:                            delete(i);
1270:                            return;
1271:                        }
1272:
1273:                        // min-max is subsumed by minRange[i]-maxRange[i]
1274:                        else if (min >= minRange[i] && max <= maxRange[i]) {
1275:                            int minr = minRange[i];
1276:                            int maxr = maxRange[i];
1277:                            delete(i);
1278:                            if (minr < min) {
1279:                                merge(minr, min - 1);
1280:                            }
1281:                            if (max < maxr) {
1282:                                merge(max + 1, maxr);
1283:                            }
1284:                            return;
1285:                        }
1286:
1287:                        // minRange is in the range, but maxRange is outside
1288:                        else if (minRange[i] >= min && minRange[i] <= max) {
1289:                            minRange[i] = max + 1;
1290:                            return;
1291:                        }
1292:
1293:                        // maxRange is in the range, but minRange is outside
1294:                        else if (maxRange[i] >= min && maxRange[i] <= max) {
1295:                            maxRange[i] = min - 1;
1296:                            return;
1297:                        }
1298:                    }
1299:                }
1300:
1301:                /**
1302:                 * Includes (or excludes) the range from min to max, inclusive.
1303:                 * @param min Minimum end of range
1304:                 * @param max Maximum end of range
1305:                 * @param include True if range should be included.  False otherwise.
1306:                 */
1307:                void include(int min, int max, boolean include) {
1308:                    if (include) {
1309:                        merge(min, max);
1310:                    } else {
1311:                        remove(min, max);
1312:                    }
1313:                }
1314:
1315:                /**
1316:                 * Includes a range with the same min and max
1317:                 * @param minmax Minimum and maximum end of range (inclusive)
1318:                 * @param include True if range should be included.  False otherwise.
1319:                 */
1320:                void include(char minmax, boolean include) {
1321:                    include(minmax, minmax, include);
1322:                }
1323:            }
1324:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.