Source Code Cross Referenced for RegexTranslator.java in  » XML » saxonb » net » sf » saxon » type » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » XML » saxonb » net.sf.saxon.type 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        package net.sf.saxon.type;
0002:
0003:        import net.sf.saxon.om.XMLChar;
0004:        import net.sf.saxon.om.FastStringBuffer;
0005:
0006:        import java.math.BigDecimal;
0007:        import java.util.*;
0008:
0009:        /**
0010:         * This class translates XML Schema regex syntax into JDK 1.4 regex syntax.
0011:         * Author: James Clark
0012:         * Modified by Michael Kay (a) to integrate the code into Saxon, and (b) to support XPath additions
0013:         * to the XML Schema regex syntax.
0014:         */
0015:        public class RegexTranslator {
0016:
0017:            /**
0018:             * Translates XML Schema regexes into <code>java.util.regex</code> regexes.
0019:             *
0020:             * @see java.util.regex.Pattern
0021:             * @see <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema Part 2</a>
0022:             */
0023:
0024:            private final CharSequence regExp;
0025:            private boolean isXPath;
0026:            private int pos = 0;
0027:            private final int length;
0028:            private char curChar;
0029:            private boolean eos = false;
0030:            private final FastStringBuffer result = new FastStringBuffer(32);
0031:
0032:            private static final String categories = "LMNPZSC";
0033:            private static final CharClass[] categoryCharClasses = new CharClass[categories
0034:                    .length()];
0035:            private static final String subCategories = "LuLlLtLmLoMnMcMeNdNlNoPcPdPsPePiPfPoZsZlZpSmScSkSoCcCfCoCn";
0036:            private static final CharClass[] subCategoryCharClasses = new CharClass[subCategories
0037:                    .length() / 2];
0038:
0039:            private static final int NONBMP_MIN = 0x10000;
0040:            private static final int NONBMP_MAX = 0x10FFFF;
0041:            private static final char SURROGATE2_MIN = '\uDC00';
0042:            private static final char SURROGATE2_MAX = '\uDFFF';
0043:
0044:            //static final Localizer localizer = new Localizer(RegexTranslator.class);
0045:
0046:            private static final String[] blockNames = { "BasicLatin",
0047:                    "Latin-1Supplement", "LatinExtended-A", "LatinExtended-B",
0048:                    "IPAExtensions", "SpacingModifierLetters",
0049:                    "CombiningDiacriticalMarks", "Greek", "Cyrillic",
0050:                    "Armenian", "Hebrew", "Arabic", "Syriac", "Thaana",
0051:                    "Devanagari", "Bengali", "Gurmukhi", "Gujarati", "Oriya",
0052:                    "Tamil", "Telugu", "Kannada", "Malayalam", "Sinhala",
0053:                    "Thai", "Lao", "Tibetan", "Myanmar", "Georgian",
0054:                    "HangulJamo", "Ethiopic", "Cherokee",
0055:                    "UnifiedCanadianAboriginalSyllabics", "Ogham", "Runic",
0056:                    "Khmer", "Mongolian", "LatinExtendedAdditional",
0057:                    "GreekExtended", "GeneralPunctuation",
0058:                    "SuperscriptsandSubscripts", "CurrencySymbols",
0059:                    "CombiningMarksforSymbols", "LetterlikeSymbols",
0060:                    "NumberForms", "Arrows", "MathematicalOperators",
0061:                    "MiscellaneousTechnical", "ControlPictures",
0062:                    "OpticalCharacterRecognition", "EnclosedAlphanumerics",
0063:                    "BoxDrawing", "BlockElements", "GeometricShapes",
0064:                    "MiscellaneousSymbols", "Dingbats", "BraillePatterns",
0065:                    "CJKRadicalsSupplement", "KangxiRadicals",
0066:                    "IdeographicDescriptionCharacters",
0067:                    "CJKSymbolsandPunctuation", "Hiragana",
0068:                    "Katakana",
0069:                    "Bopomofo",
0070:                    "HangulCompatibilityJamo",
0071:                    "Kanbun",
0072:                    "BopomofoExtended",
0073:                    "EnclosedCJKLettersandMonths",
0074:                    "CJKCompatibility",
0075:                    "CJKUnifiedIdeographsExtensionA",
0076:                    "CJKUnifiedIdeographs",
0077:                    "YiSyllables",
0078:                    "YiRadicals",
0079:                    "HangulSyllables",
0080:                    // surrogates excluded because there are never any *characters* with codes in surrogate range
0081:                    // "PrivateUse", excluded because 3.1 adds non-BMP ranges
0082:                    "CJKCompatibilityIdeographs",
0083:                    "AlphabeticPresentationForms", "ArabicPresentationForms-A",
0084:                    "CombiningHalfMarks", "CJKCompatibilityForms",
0085:                    "SmallFormVariants", "ArabicPresentationForms-B",
0086:                    "Specials", "HalfwidthandFullwidthForms", "Specials" };
0087:
0088:            /**
0089:             * Names of blocks including ranges outside the BMP.
0090:             */
0091:            private static final String[] specialBlockNames = { "OldItalic",
0092:                    "Gothic", "Deseret", "ByzantineMusicalSymbols",
0093:                    "MusicalSymbols", "MathematicalAlphanumericSymbols",
0094:                    "CJKUnifiedIdeographsExtensionB",
0095:                    "CJKCompatibilityIdeographsSupplement", "Tags",
0096:                    "PrivateUse", "HighSurrogates", "HighPrivateUseSurrogates",
0097:                    "LowSurrogates", };
0098:
0099:            // This file was automatically generated by CategoriesGen
0100:
0101:            static final String CATEGORY_NAMES = "NoLoMnCfLlNlPoLuMcNdSoSmCo";
0102:
0103:            static final int[][] CATEGORY_RANGES = {
0104:                    {
0105:                    // No
0106:                            0x10107, 0x10133, 0x10320, 0x10323 },
0107:                    {
0108:                            // Lo
0109:                            0x10000, 0x1000b, 0x1000d, 0x10026, 0x10028,
0110:                            0x1003a, 0x1003c, 0x1003d, 0x1003f, 0x1004d,
0111:                            0x10050, 0x1005d, 0x10080, 0x100fa, 0x10300,
0112:                            0x1031e, 0x10330, 0x10349, 0x10380, 0x1039d,
0113:                            0x10450, 0x1049d, 0x10800, 0x10805, 0x10808,
0114:                            0x10808, 0x1080a, 0x10835, 0x10837, 0x10838,
0115:                            0x1083c, 0x1083c, 0x1083f, 0x1083f, 0x20000,
0116:                            0x2a6d6, 0x2f800, 0x2fa1d },
0117:                    {
0118:                            // Mn
0119:                            0x1d167, 0x1d169, 0x1d17b, 0x1d182, 0x1d185,
0120:                            0x1d18b, 0x1d1aa, 0x1d1ad, 0xe0100, 0xe01ef },
0121:                    {
0122:                            // Cf
0123:                            0x1d173, 0x1d17a, 0xe0001, 0xe0001, 0xe0020,
0124:                            0xe007f },
0125:                    {
0126:                            // Ll
0127:                            0x10428, 0x1044f, 0x1d41a, 0x1d433, 0x1d44e,
0128:                            0x1d454, 0x1d456, 0x1d467, 0x1d482, 0x1d49b,
0129:                            0x1d4b6, 0x1d4b9, 0x1d4bb, 0x1d4bb, 0x1d4bd,
0130:                            0x1d4c3, 0x1d4c5, 0x1d4cf, 0x1d4ea, 0x1d503,
0131:                            0x1d51e, 0x1d537, 0x1d552, 0x1d56b, 0x1d586,
0132:                            0x1d59f, 0x1d5ba, 0x1d5d3, 0x1d5ee, 0x1d607,
0133:                            0x1d622, 0x1d63b, 0x1d656, 0x1d66f, 0x1d68a,
0134:                            0x1d6a3, 0x1d6c2, 0x1d6da, 0x1d6dc, 0x1d6e1,
0135:                            0x1d6fc, 0x1d714, 0x1d716, 0x1d71b, 0x1d736,
0136:                            0x1d74e, 0x1d750, 0x1d755, 0x1d770, 0x1d788,
0137:                            0x1d78a, 0x1d78f, 0x1d7aa, 0x1d7c2, 0x1d7c4,
0138:                            0x1d7c9 },
0139:                    {
0140:                    // Nl
0141:                            0x1034a, 0x1034a },
0142:                    {
0143:                    // Po
0144:                            0x10100, 0x10101, 0x1039f, 0x1039f },
0145:                    {
0146:                            // Lu
0147:                            0x10400, 0x10427, 0x1d400, 0x1d419, 0x1d434,
0148:                            0x1d44d, 0x1d468, 0x1d481, 0x1d49c, 0x1d49c,
0149:                            0x1d49e, 0x1d49f, 0x1d4a2, 0x1d4a2, 0x1d4a5,
0150:                            0x1d4a6, 0x1d4a9, 0x1d4ac, 0x1d4ae, 0x1d4b5,
0151:                            0x1d4d0, 0x1d4e9, 0x1d504, 0x1d505, 0x1d507,
0152:                            0x1d50a, 0x1d50d, 0x1d514, 0x1d516, 0x1d51c,
0153:                            0x1d538, 0x1d539, 0x1d53b, 0x1d53e, 0x1d540,
0154:                            0x1d544, 0x1d546, 0x1d546, 0x1d54a, 0x1d550,
0155:                            0x1d56c, 0x1d585, 0x1d5a0, 0x1d5b9, 0x1d5d4,
0156:                            0x1d5ed, 0x1d608, 0x1d621, 0x1d63c, 0x1d655,
0157:                            0x1d670, 0x1d689, 0x1d6a8, 0x1d6c0, 0x1d6e2,
0158:                            0x1d6fa, 0x1d71c, 0x1d734, 0x1d756, 0x1d76e,
0159:                            0x1d790, 0x1d7a8 },
0160:                    {
0161:                    // Mc
0162:                            0x1d165, 0x1d166, 0x1d16d, 0x1d172 },
0163:                    {
0164:                    // Nd
0165:                            0x104a0, 0x104a9, 0x1d7ce, 0x1d7ff },
0166:                    {
0167:                            // So
0168:                            0x10102, 0x10102, 0x10137, 0x1013f, 0x1d000,
0169:                            0x1d0f5, 0x1d100, 0x1d126, 0x1d12a, 0x1d164,
0170:                            0x1d16a, 0x1d16c, 0x1d183, 0x1d184, 0x1d18c,
0171:                            0x1d1a9, 0x1d1ae, 0x1d1dd, 0x1d300, 0x1d356 },
0172:                    {
0173:                            // Sm
0174:                            0x1d6c1, 0x1d6c1, 0x1d6db, 0x1d6db, 0x1d6fb,
0175:                            0x1d6fb, 0x1d715, 0x1d715, 0x1d735, 0x1d735,
0176:                            0x1d74f, 0x1d74f, 0x1d76f, 0x1d76f, 0x1d789,
0177:                            0x1d789, 0x1d7a9, 0x1d7a9, 0x1d7c3, 0x1d7c3 }, {
0178:                    // Co
0179:                            0xf0000, 0xffffd, 0x100000, 0x10fffd } };
0180:
0181:            // end of generated code
0182:
0183:            /**
0184:             * CharClass for each block name in specialBlockNames.
0185:             */
0186:            private static final CharClass[] specialBlockCharClasses = {
0187:                    new CharRange(0x10300, 0x1032F),
0188:                    new CharRange(0x10330, 0x1034F),
0189:                    new CharRange(0x10400, 0x1044F),
0190:                    new CharRange(0x1D000, 0x1D0FF),
0191:                    new CharRange(0x1D100, 0x1D1FF),
0192:                    new CharRange(0x1D400, 0x1D7FF),
0193:                    new CharRange(0x20000, 0x2A6D6),
0194:                    new CharRange(0x2F800, 0x2FA1F),
0195:                    new CharRange(0xE0000, 0xE007F),
0196:                    new Union(new CharClass[] { new CharRange(0xE000, 0xF8FF),
0197:                            new CharRange(0xF0000, 0xFFFFD),
0198:                            new CharRange(0x100000, 0x10FFFD) }),
0199:                    Empty.getInstance(), Empty.getInstance(),
0200:                    Empty.getInstance() };
0201:
0202:            private static final CharClass DOT = new Complement(new Union(
0203:                    new CharClass[] { new SingleChar('\n'),
0204:                            new SingleChar('\r') }));
0205:
0206:            private static final CharClass ESC_d = new Property("Nd");
0207:
0208:            private static final CharClass ESC_D = new Complement(ESC_d);
0209:
0210:            private static final CharClass ESC_W = new Union(new CharClass[] {
0211:                    computeCategoryCharClass('P'),
0212:                    computeCategoryCharClass('Z'),
0213:                    computeCategoryCharClass('C') });
0214:            //was: new Property("P"), new Property("Z"), new Property("C") }
0215:
0216:            private static final CharClass ESC_w = new Complement(ESC_W);
0217:
0218:            private static final CharClass ESC_s = new Union(new CharClass[] {
0219:                    new SingleChar(' '), new SingleChar('\n'),
0220:                    new SingleChar('\r'), new SingleChar('\t') });
0221:
0222:            // This file was automatically generated by NamingExceptionsGen
0223:            // class NamingExceptions {
0224:            static final String NMSTRT_INCLUDES = "\u003A\u005F\u02BB\u02BC\u02BD\u02BE\u02BF\u02C0\u02C1\u0559"
0225:                    + "\u06E5\u06E6\u212E";
0226:            static final String NMSTRT_EXCLUDE_RANGES = "\u00AA\u00BA\u0132\u0133\u013F\u0140\u0149\u0149\u017F\u017F"
0227:                    + "\u01C4\u01CC\u01F1\u01F3\u01F6\u01F9\u0218\u0233\u02A9\u02AD"
0228:                    + "\u03D7\u03D7\u03DB\u03DB\u03DD\u03DD\u03DF\u03DF\u03E1\u03E1"
0229:                    + "\u0400\u0400\u040D\u040D\u0450\u0450\u045D\u045D\u048C\u048F"
0230:                    + "\u04EC\u04ED\u0587\u0587\u06B8\u06B9\u06BF\u06BF\u06CF\u06CF"
0231:                    + "\u06FA\u07A5\u0950\u0950\u0AD0\u0AD0\u0D85\u0DC6\u0E2F\u0E2F"
0232:                    + "\u0EAF\u0EAF\u0EDC\u0F00\u0F6A\u1055\u1101\u1101\u1104\u1104"
0233:                    + "\u1108\u1108\u110A\u110A\u110D\u110D\u1113\u113B\u113D\u113D"
0234:                    + "\u113F\u113F\u1141\u114B\u114D\u114D\u114F\u114F\u1151\u1153"
0235:                    + "\u1156\u1158\u1162\u1162\u1164\u1164\u1166\u1166\u1168\u1168"
0236:                    + "\u116A\u116C\u116F\u1171\u1174\u1174\u1176\u119D\u119F\u11A2"
0237:                    + "\u11A9\u11AA\u11AC\u11AD\u11B0\u11B6\u11B9\u11B9\u11BB\u11BB"
0238:                    + "\u11C3\u11EA\u11EC\u11EF\u11F1\u11F8\u1200\u18A8\u207F\u2124"
0239:                    + "\u2128\u2128\u212C\u212D\u212F\u217F\u2183\u3006\u3038\u303A"
0240:                    + "\u3131\u4DB5\uA000\uA48C\uF900\uFFDC";
0241:            static final String NMSTRT_CATEGORIES = "LlLuLoLtNl";
0242:            static final String NMCHAR_INCLUDES = "\u002D\u002E\u003A\u005F\u00B7\u0387\u212E";
0243:            static final String NMCHAR_EXCLUDE_RANGES = "\u00AA\u00B5\u00BA\u00BA\u0132\u0133\u013F\u0140\u0149\u0149"
0244:                    + "\u017F\u017F\u01C4\u01CC\u01F1\u01F3\u01F6\u01F9\u0218\u0233"
0245:                    + "\u02A9\u02B8\u02E0\u02EE\u0346\u034E\u0362\u037A\u03D7\u03D7"
0246:                    + "\u03DB\u03DB\u03DD\u03DD\u03DF\u03DF\u03E1\u03E1\u0400\u0400"
0247:                    + "\u040D\u040D\u0450\u0450\u045D\u045D\u0488\u048F\u04EC\u04ED"
0248:                    + "\u0587\u0587\u0653\u0655\u06B8\u06B9\u06BF\u06BF\u06CF\u06CF"
0249:                    + "\u06FA\u07B0\u0950\u0950\u0AD0\u0AD0\u0D82\u0DF3\u0E2F\u0E2F"
0250:                    + "\u0EAF\u0EAF\u0EDC\u0F00\u0F6A\u0F6A\u0F96\u0F96\u0FAE\u0FB0"
0251:                    + "\u0FB8\u0FB8\u0FBA\u1059\u1101\u1101\u1104\u1104\u1108\u1108"
0252:                    + "\u110A\u110A\u110D\u110D\u1113\u113B\u113D\u113D\u113F\u113F"
0253:                    + "\u1141\u114B\u114D\u114D\u114F\u114F\u1151\u1153\u1156\u1158"
0254:                    + "\u1162\u1162\u1164\u1164\u1166\u1166\u1168\u1168\u116A\u116C"
0255:                    + "\u116F\u1171\u1174\u1174\u1176\u119D\u119F\u11A2\u11A9\u11AA"
0256:                    + "\u11AC\u11AD\u11B0\u11B6\u11B9\u11B9\u11BB\u11BB\u11C3\u11EA"
0257:                    + "\u11EC\u11EF\u11F1\u11F8\u1200\u18A9\u207F\u207F\u20DD\u20E0"
0258:                    + "\u20E2\u2124\u2128\u2128\u212C\u212D\u212F\u217F\u2183\u2183"
0259:                    + "\u3006\u3006\u3038\u303A\u3131\u4DB5\uA000\uA48C\uF900\uFFDC";
0260:            static final String NMCHAR_CATEGORIES = "LlLuLoLtNlMcMeMnLmNd";
0261:            // end of generated code
0262:
0263:            private static final CharClass ESC_S = new Complement(ESC_s);
0264:
0265:            private static final CharClass ESC_i = makeCharClass(
0266:                    NMSTRT_CATEGORIES, NMSTRT_INCLUDES, NMSTRT_EXCLUDE_RANGES);
0267:
0268:            private static final CharClass ESC_I = new Complement(ESC_i);
0269:
0270:            private static final CharClass ESC_c = makeCharClass(
0271:                    NMCHAR_CATEGORIES, NMCHAR_INCLUDES, NMCHAR_EXCLUDE_RANGES);
0272:
0273:            private static final CharClass ESC_C = new Complement(ESC_c);
0274:
0275:            private static final char EOS = '\0';
0276:
0277:            private RegexTranslator(CharSequence regExp) {
0278:                this .regExp = regExp;
0279:                this .length = regExp.length();
0280:                advance();
0281:            }
0282:
0283:            /**
0284:             * Translates a regular expression in the syntax of XML Schemas Part 2 into a regular
0285:             * expression in the syntax of <code>java.util.regex.Pattern</code>.  The translation
0286:             * assumes that the string to be matched against the regex uses surrogate pairs correctly.
0287:             * If the string comes from XML content, a conforming XML parser will automatically
0288:             * check this; if the string comes from elsewhere, it may be necessary to check
0289:             * surrogate usage before matching.
0290:             *
0291:             * @param regexp a String containing a regular expression in the syntax of XML Schemas Part 2
0292:             * @param xpath a boolean indicating whether the XPath 2.0 F+O extensions to the schema
0293:             * regex syntax are permitted
0294:             * @return a String containing a regular expression in the syntax of java.util.regex.Pattern
0295:             * @throws RegexSyntaxException if <code>regexp</code> is not a regular expression in the
0296:             * syntax of XML Schemas Part 2
0297:             * @see java.util.regex.Pattern
0298:             * @see <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema Part 2</a>
0299:             */
0300:            public static String translate(CharSequence regexp, boolean xpath)
0301:                    throws RegexSyntaxException {
0302:                RegexTranslator tr = new RegexTranslator(regexp);
0303:                tr.isXPath = xpath;
0304:                tr.translateTop();
0305:                return tr.result.toString();
0306:            }
0307:
0308:            private void advance() {
0309:                if (pos < length)
0310:                    curChar = regExp.charAt(pos++);
0311:                else {
0312:                    pos++;
0313:                    curChar = EOS;
0314:                    eos = true;
0315:                }
0316:            }
0317:
0318:            private void translateTop() throws RegexSyntaxException {
0319:                translateRegExp();
0320:                if (!eos)
0321:                    throw makeException("expected end of string");
0322:            }
0323:
0324:            private void translateRegExp() throws RegexSyntaxException {
0325:                translateBranch();
0326:                while (curChar == '|') {
0327:                    copyCurChar();
0328:                    translateBranch();
0329:                }
0330:            }
0331:
0332:            private void translateBranch() throws RegexSyntaxException {
0333:                while (translateAtom())
0334:                    translateQuantifier();
0335:            }
0336:
0337:            private void translateQuantifier() throws RegexSyntaxException {
0338:                switch (curChar) {
0339:                case '*':
0340:                case '?':
0341:                case '+':
0342:                    copyCurChar();
0343:                    break;
0344:                case '{':
0345:                    copyCurChar();
0346:                    translateQuantity();
0347:                    expect('}');
0348:                    copyCurChar();
0349:                    break;
0350:                default:
0351:                    return;
0352:                }
0353:                if (curChar == '?' && isXPath) {
0354:                    copyCurChar();
0355:                }
0356:            }
0357:
0358:            private void translateQuantity() throws RegexSyntaxException {
0359:                String lower = parseQuantExact().toString();
0360:                int lowerValue = -1;
0361:                try {
0362:                    lowerValue = Integer.parseInt(lower);
0363:                    result.append(lower);
0364:                } catch (NumberFormatException e) {
0365:                    // JDK 1.4 cannot handle ranges bigger than this
0366:                    result.append("" + Integer.MAX_VALUE);
0367:                }
0368:                if (curChar == ',') {
0369:                    copyCurChar();
0370:                    if (curChar != '}') {
0371:                        String upper = parseQuantExact().toString();
0372:                        try {
0373:                            int upperValue = Integer.parseInt(upper);
0374:                            result.append(upper);
0375:                            if (lowerValue < 0 || upperValue < lowerValue)
0376:                                throw makeException("invalid range in quantifier");
0377:                        } catch (NumberFormatException e) {
0378:                            result.append("" + Integer.MAX_VALUE);
0379:                            if (lowerValue < 0
0380:                                    && new BigDecimal(lower)
0381:                                            .compareTo(new BigDecimal(upper)) > 0)
0382:                                throw makeException("invalid range in quantifier");
0383:                        }
0384:                    }
0385:                }
0386:            }
0387:
0388:            private CharSequence parseQuantExact() throws RegexSyntaxException {
0389:                FastStringBuffer buf = new FastStringBuffer(10);
0390:                do {
0391:                    if ("0123456789".indexOf(curChar) < 0)
0392:                        throw makeException("expected digit in quantifier");
0393:                    buf.append(curChar);
0394:                    advance();
0395:                } while (curChar != ',' && curChar != '}');
0396:                return buf;
0397:            }
0398:
0399:            private void copyCurChar() {
0400:                result.append(curChar);
0401:                advance();
0402:            }
0403:
0404:            static final int NONE = -1;
0405:            static final int SOME = 0;
0406:            static final int ALL = 1;
0407:
0408:            static final String SURROGATES1_CLASS = "[\uD800-\uDBFF]";
0409:            static final String SURROGATES2_CLASS = "[\uDC00-\uDFFF]";
0410:            static final String NOT_ALLOWED_CLASS = "[\u0000&&[^\u0000]]";
0411:
0412:            static final class Range implements  Comparable {
0413:                private final int min;
0414:                private final int max;
0415:
0416:                Range(int min, int max) {
0417:                    this .min = min;
0418:                    this .max = max;
0419:                }
0420:
0421:                int getMin() {
0422:                    return min;
0423:                }
0424:
0425:                int getMax() {
0426:                    return max;
0427:                }
0428:
0429:                public int compareTo(Object o) {
0430:                    Range other = (Range) o;
0431:                    if (this .min < other.min)
0432:                        return -1;
0433:                    if (this .min > other.min)
0434:                        return 1;
0435:                    if (this .max > other.max)
0436:                        return -1;
0437:                    if (this .max < other.max)
0438:                        return 1;
0439:                    return 0;
0440:                }
0441:            }
0442:
0443:            static abstract class CharClass {
0444:
0445:                private final int containsBmp;
0446:                // if it contains ALL and containsBmp != NONE, then the generated class for containsBmp must
0447:                // contain all the high surrogates
0448:                private final int containsNonBmp;
0449:
0450:                protected CharClass(int containsBmp, int containsNonBmp) {
0451:                    this .containsBmp = containsBmp;
0452:                    this .containsNonBmp = containsNonBmp;
0453:                }
0454:
0455:                int getContainsBmp() {
0456:                    return containsBmp;
0457:                }
0458:
0459:                int getContainsNonBmp() {
0460:                    return containsNonBmp;
0461:                }
0462:
0463:                final void output(FastStringBuffer buf) {
0464:                    switch (containsNonBmp) {
0465:                    case NONE:
0466:                        if (containsBmp == NONE)
0467:                            buf.append(NOT_ALLOWED_CLASS);
0468:                        else
0469:                            outputBmp(buf);
0470:                        break;
0471:                    case ALL:
0472:                        buf.append("(?:");
0473:                        if (containsBmp == NONE) {
0474:                            buf.append(SURROGATES1_CLASS);
0475:                            buf.append(SURROGATES2_CLASS);
0476:                        } else {
0477:                            outputBmp(buf);
0478:                            buf.append(SURROGATES2_CLASS);
0479:                            buf.append('?');
0480:                        }
0481:                        buf.append(')');
0482:                        break;
0483:                    case SOME:
0484:                        buf.append("(?:");
0485:                        boolean needSep = false;
0486:                        if (containsBmp != NONE) {
0487:                            needSep = true;
0488:                            outputBmp(buf);
0489:                        }
0490:                        List ranges = new ArrayList(10);
0491:                        addNonBmpRanges(ranges);
0492:                        sortRangeList(ranges);
0493:                        String hi = highSurrogateRanges(ranges);
0494:                        if (hi.length() > 0) {
0495:                            if (needSep)
0496:                                buf.append('|');
0497:                            else
0498:                                needSep = true;
0499:                            buf.append('[');
0500:                            for (int i = 0, len = hi.length(); i < len; i += 2) {
0501:                                char min = hi.charAt(i);
0502:                                char max = hi.charAt(i + 1);
0503:                                if (min == max)
0504:                                    buf.append(min);
0505:                                else {
0506:                                    buf.append(min);
0507:                                    buf.append('-');
0508:                                    buf.append(max);
0509:                                }
0510:                            }
0511:                            buf.append(']');
0512:                            buf.append(SURROGATES2_CLASS);
0513:                        }
0514:                        String lo = lowSurrogateRanges(ranges);
0515:                        for (int i = 0, len = lo.length(); i < len; i += 3) {
0516:                            if (needSep)
0517:                                buf.append('|');
0518:                            else
0519:                                needSep = true;
0520:                            buf.append(lo.charAt(i));
0521:                            char min = lo.charAt(i + 1);
0522:                            char max = lo.charAt(i + 2);
0523:                            if (min == max
0524:                                    && (i + 3 >= len || lo.charAt(i + 3) != lo
0525:                                            .charAt(i)))
0526:                                buf.append(min);
0527:                            else {
0528:                                buf.append('[');
0529:                                for (;;) {
0530:                                    if (min == max)
0531:                                        buf.append(min);
0532:                                    else {
0533:                                        buf.append(min);
0534:                                        buf.append('-');
0535:                                        buf.append(max);
0536:                                    }
0537:                                    if (i + 3 >= len
0538:                                            || lo.charAt(i + 3) != lo.charAt(i))
0539:                                        break;
0540:                                    i += 3;
0541:                                    min = lo.charAt(i + 1);
0542:                                    max = lo.charAt(i + 2);
0543:                                }
0544:                                buf.append(']');
0545:                            }
0546:                        }
0547:                        if (!needSep)
0548:                            buf.append(NOT_ALLOWED_CLASS);
0549:                        buf.append(')');
0550:                        break;
0551:                    }
0552:                }
0553:
0554:                static String highSurrogateRanges(List ranges) {
0555:                    FastStringBuffer highRanges = new FastStringBuffer(ranges
0556:                            .size() * 2);
0557:                    for (int i = 0, len = ranges.size(); i < len; i++) {
0558:                        Range r = (Range) ranges.get(i);
0559:                        char min1 = XMLChar.highSurrogate(r.getMin());
0560:                        char min2 = XMLChar.lowSurrogate(r.getMin());
0561:                        char max1 = XMLChar.highSurrogate(r.getMax());
0562:                        char max2 = XMLChar.lowSurrogate(r.getMax());
0563:                        if (min2 != SURROGATE2_MIN)
0564:                            min1++;
0565:                        if (max2 != SURROGATE2_MAX)
0566:                            max1--;
0567:                        if (max1 >= min1) {
0568:                            highRanges.append(min1);
0569:                            highRanges.append(max1);
0570:                        }
0571:                    }
0572:                    return highRanges.toString();
0573:                }
0574:
0575:                static String lowSurrogateRanges(List ranges) {
0576:                    FastStringBuffer lowRanges = new FastStringBuffer(ranges
0577:                            .size() * 2);
0578:                    for (int i = 0, len = ranges.size(); i < len; i++) {
0579:                        Range r = (Range) ranges.get(i);
0580:                        char min1 = XMLChar.highSurrogate(r.getMin());
0581:                        char min2 = XMLChar.lowSurrogate(r.getMin());
0582:                        char max1 = XMLChar.highSurrogate(r.getMax());
0583:                        char max2 = XMLChar.lowSurrogate(r.getMax());
0584:                        if (min1 == max1) {
0585:                            if (min2 != SURROGATE2_MIN
0586:                                    || max2 != SURROGATE2_MAX) {
0587:                                lowRanges.append(min1);
0588:                                lowRanges.append(min2);
0589:                                lowRanges.append(max2);
0590:                            }
0591:                        } else {
0592:                            if (min2 != SURROGATE2_MIN) {
0593:                                lowRanges.append(min1);
0594:                                lowRanges.append(min2);
0595:                                lowRanges.append(SURROGATE2_MAX);
0596:                            }
0597:                            if (max2 != SURROGATE2_MAX) {
0598:                                lowRanges.append(max1);
0599:                                lowRanges.append(SURROGATE2_MIN);
0600:                                lowRanges.append(max2);
0601:                            }
0602:                        }
0603:                    }
0604:                    return lowRanges.toString();
0605:                }
0606:
0607:                abstract void outputBmp(FastStringBuffer buf);
0608:
0609:                abstract void outputComplementBmp(FastStringBuffer buf);
0610:
0611:                int getSingleChar() {
0612:                    return -1;
0613:                }
0614:
0615:                void addNonBmpRanges(List ranges) {
0616:                }
0617:
0618:                static void sortRangeList(List ranges) {
0619:                    Collections.sort(ranges);
0620:                    int toIndex = 0;
0621:                    int fromIndex = 0;
0622:                    int len = ranges.size();
0623:                    while (fromIndex < len) {
0624:                        Range r = (Range) ranges.get(fromIndex);
0625:                        int min = r.getMin();
0626:                        int max = r.getMax();
0627:                        while (++fromIndex < len) {
0628:                            Range r2 = (Range) ranges.get(fromIndex);
0629:                            if (r2.getMin() > max + 1)
0630:                                break;
0631:                            if (r2.getMax() > max)
0632:                                max = r2.getMax();
0633:                        }
0634:                        if (max != r.getMax())
0635:                            r = new Range(min, max);
0636:                        ranges.set(toIndex++, r);
0637:                    }
0638:                    while (len > toIndex)
0639:                        ranges.remove(--len);
0640:                }
0641:
0642:            }
0643:
0644:            static abstract class SimpleCharClass extends CharClass {
0645:                SimpleCharClass(int containsBmp, int containsNonBmp) {
0646:                    super (containsBmp, containsNonBmp);
0647:                }
0648:
0649:                void outputBmp(FastStringBuffer buf) {
0650:                    buf.append('[');
0651:                    inClassOutputBmp(buf);
0652:                    buf.append(']');
0653:                }
0654:
0655:                // must not call if containsBmp == ALL
0656:                void outputComplementBmp(FastStringBuffer buf) {
0657:                    if (getContainsBmp() == NONE)
0658:                        buf.append("[\u0000-\uFFFF]");
0659:                    else {
0660:                        buf.append("[^");
0661:                        inClassOutputBmp(buf);
0662:                        buf.append(']');
0663:                    }
0664:                }
0665:
0666:                abstract void inClassOutputBmp(FastStringBuffer buf);
0667:            }
0668:
0669:            static class SingleChar extends SimpleCharClass {
0670:                private final char c;
0671:
0672:                SingleChar(char c) {
0673:                    super (SOME, NONE);
0674:                    this .c = c;
0675:                }
0676:
0677:                int getSingleChar() {
0678:                    return c;
0679:                }
0680:
0681:                void outputBmp(FastStringBuffer buf) {
0682:                    inClassOutputBmp(buf);
0683:                }
0684:
0685:                void inClassOutputBmp(FastStringBuffer buf) {
0686:                    if (isJavaMetaChar(c)) {
0687:                        buf.append('\\');
0688:                        buf.append(c);
0689:                    } else {
0690:                        switch (c) {
0691:                        case '\r':
0692:                            buf.append("\\r");
0693:                            break;
0694:                        case '\n':
0695:                            buf.append("\\n");
0696:                            break;
0697:                        case '\t':
0698:                            buf.append("\\t");
0699:                            break;
0700:                        case ' ':
0701:                            buf.append("\\x20");
0702:                            break;
0703:                        default:
0704:                            buf.append(c);
0705:                        }
0706:                    }
0707:                    return;
0708:                }
0709:
0710:            }
0711:
0712:            static class WideSingleChar extends SimpleCharClass {
0713:                private final int c;
0714:
0715:                WideSingleChar(int c) {
0716:                    super (NONE, SOME);
0717:                    this .c = c;
0718:                }
0719:
0720:                void inClassOutputBmp(FastStringBuffer buf) {
0721:                    throw new RuntimeException("BMP output botch");
0722:                }
0723:
0724:                int getSingleChar() {
0725:                    return c;
0726:                }
0727:
0728:                void addNonBmpRanges(List ranges) {
0729:                    ranges.add(new Range(c, c));
0730:                }
0731:            }
0732:
0733:            static class Empty extends SimpleCharClass {
0734:                private static final Empty instance = new Empty();
0735:
0736:                private Empty() {
0737:                    super (NONE, NONE);
0738:                }
0739:
0740:                static Empty getInstance() {
0741:                    return instance;
0742:                }
0743:
0744:                void inClassOutputBmp(FastStringBuffer buf) {
0745:                    throw new RuntimeException("BMP output botch");
0746:                }
0747:
0748:            }
0749:
0750:            static class CharRange extends SimpleCharClass {
0751:                private final int lower;
0752:                private final int upper;
0753:
0754:                CharRange(int lower, int upper) {
0755:                    super (lower < NONBMP_MIN ? SOME : NONE,
0756:                    // don't use ALL here, because that requires that the BMP class contains high surrogates
0757:                            upper >= NONBMP_MIN ? SOME : NONE);
0758:                    this .lower = lower;
0759:                    this .upper = upper;
0760:                }
0761:
0762:                void inClassOutputBmp(FastStringBuffer buf) {
0763:                    if (lower >= NONBMP_MIN)
0764:                        throw new RuntimeException("BMP output botch");
0765:                    if (isJavaMetaChar((char) lower))
0766:                        buf.append('\\');
0767:                    buf.append((char) lower);
0768:                    buf.append('-');
0769:                    if (upper < NONBMP_MIN) {
0770:                        if (isJavaMetaChar((char) upper))
0771:                            buf.append('\\');
0772:                        buf.append((char) upper);
0773:                    } else
0774:                        buf.append('\uFFFF');
0775:                }
0776:
0777:                void addNonBmpRanges(List ranges) {
0778:                    if (upper >= NONBMP_MIN)
0779:                        ranges.add(new Range(lower < NONBMP_MIN ? NONBMP_MIN
0780:                                : lower, upper));
0781:                }
0782:            }
0783:
0784:            static class Property extends SimpleCharClass {
0785:                private final String name;
0786:
0787:                Property(String name) {
0788:                    super (SOME, NONE);
0789:                    this .name = name;
0790:                }
0791:
0792:                void outputBmp(FastStringBuffer buf) {
0793:                    inClassOutputBmp(buf);
0794:                }
0795:
0796:                void inClassOutputBmp(FastStringBuffer buf) {
0797:                    buf.append("\\p{");
0798:                    buf.append(name);
0799:                    buf.append('}');
0800:                }
0801:
0802:                void outputComplementBmp(FastStringBuffer buf) {
0803:                    buf.append("\\P{");
0804:                    buf.append(name);
0805:                    buf.append('}');
0806:                }
0807:            }
0808:
0809:            static class Subtraction extends CharClass {
0810:                private final CharClass cc1;
0811:                private final CharClass cc2;
0812:
0813:                Subtraction(CharClass cc1, CharClass cc2) {
0814:                    // min corresponds to intersection
0815:                    // complement corresponds to negation
0816:                    super (
0817:                            Math.min(cc1.getContainsBmp(), -cc2
0818:                                    .getContainsBmp()), Math.min(cc1
0819:                                    .getContainsNonBmp(), -cc2
0820:                                    .getContainsNonBmp()));
0821:                    this .cc1 = cc1;
0822:                    this .cc2 = cc2;
0823:                }
0824:
0825:                void outputBmp(FastStringBuffer buf) {
0826:                    buf.append('[');
0827:                    cc1.outputBmp(buf);
0828:                    buf.append("&&");
0829:                    cc2.outputComplementBmp(buf);
0830:                    buf.append(']');
0831:                }
0832:
0833:                void outputComplementBmp(FastStringBuffer buf) {
0834:                    buf.append('[');
0835:                    cc1.outputComplementBmp(buf);
0836:                    cc2.outputBmp(buf);
0837:                    buf.append(']');
0838:                }
0839:
0840:                void addNonBmpRanges(List ranges) {
0841:                    List posList = new Vector();
0842:                    cc1.addNonBmpRanges(posList);
0843:                    List negList = new Vector();
0844:                    cc2.addNonBmpRanges(negList);
0845:                    sortRangeList(posList);
0846:                    sortRangeList(negList);
0847:                    Iterator negIter = negList.iterator();
0848:                    Range negRange;
0849:                    if (negIter.hasNext())
0850:                        negRange = (Range) negIter.next();
0851:                    else
0852:                        negRange = null;
0853:                    for (int i = 0, len = posList.size(); i < len; i++) {
0854:                        Range posRange = (Range) posList.get(i);
0855:                        while (negRange != null
0856:                                && negRange.getMax() < posRange.getMin()) {
0857:                            if (negIter.hasNext())
0858:                                negRange = (Range) negIter.next();
0859:                            else
0860:                                negRange = null;
0861:                        }
0862:                        // if negRange != null, negRange.max >= posRange.min
0863:                        int min = posRange.getMin();
0864:                        while (negRange != null
0865:                                && negRange.getMin() <= posRange.getMax()) {
0866:                            if (min < negRange.getMin()) {
0867:                                ranges
0868:                                        .add(new Range(min,
0869:                                                negRange.getMin() - 1));
0870:                            }
0871:                            min = negRange.getMax() + 1;
0872:                            if (min > posRange.getMax())
0873:                                break;
0874:                            if (negIter.hasNext())
0875:                                negRange = (Range) negIter.next();
0876:                            else
0877:                                negRange = null;
0878:                        }
0879:                        if (min <= posRange.getMax())
0880:                            ranges.add(new Range(min, posRange.getMax()));
0881:                    }
0882:                }
0883:            }
0884:
0885:            static class Union extends CharClass {
0886:                private final List members;
0887:
0888:                Union(CharClass[] v) {
0889:                    this (toList(v));
0890:                }
0891:
0892:                private static List toList(CharClass[] v) {
0893:                    List members = new Vector();
0894:                    for (int i = 0; i < v.length; i++)
0895:                        members.add(v[i]);
0896:                    return members;
0897:                }
0898:
0899:                Union(List members) {
0900:                    super (computeContainsBmp(members),
0901:                            computeContainsNonBmp(members));
0902:                    this .members = members;
0903:                }
0904:
0905:                void outputBmp(FastStringBuffer buf) {
0906:                    buf.append('[');
0907:                    for (int i = 0, len = members.size(); i < len; i++) {
0908:                        CharClass cc = (CharClass) members.get(i);
0909:                        if (cc.getContainsBmp() != NONE) {
0910:                            if (cc instanceof  SimpleCharClass)
0911:                                ((SimpleCharClass) cc).inClassOutputBmp(buf);
0912:                            else
0913:                                cc.outputBmp(buf);
0914:                        }
0915:                    }
0916:                    buf.append(']');
0917:                }
0918:
0919:                void outputComplementBmp(FastStringBuffer buf) {
0920:                    boolean first = true;
0921:                    int len = members.size();
0922:                    for (int i = 0; i < len; i++) {
0923:                        CharClass cc = (CharClass) members.get(i);
0924:                        if (cc.getContainsBmp() != NONE
0925:                                && cc instanceof  SimpleCharClass) {
0926:                            if (first) {
0927:                                buf.append("[^");
0928:                                first = false;
0929:                            }
0930:                            ((SimpleCharClass) cc).inClassOutputBmp(buf);
0931:                        }
0932:                    }
0933:                    for (int i = 0; i < len; i++) {
0934:                        CharClass cc = (CharClass) members.get(i);
0935:                        if (cc.getContainsBmp() != NONE
0936:                                && !(cc instanceof  SimpleCharClass)) {
0937:                            if (first) {
0938:                                buf.append('[');
0939:                                first = false;
0940:                            } else
0941:                                buf.append("&&");
0942:                            // can't have any members that are ALL, because that would make this ALL, which violates
0943:                            // the precondition for outputComplementBmp
0944:                            cc.outputComplementBmp(buf);
0945:                        }
0946:                    }
0947:                    if (first == true)
0948:                        // all members are NONE, so this is NONE, so complement is everything
0949:                        buf.append("[\u0000-\uFFFF]");
0950:                    else
0951:                        buf.append(']');
0952:                }
0953:
0954:                void addNonBmpRanges(List ranges) {
0955:                    for (int i = 0, len = members.size(); i < len; i++)
0956:                        ((CharClass) members.get(i)).addNonBmpRanges(ranges);
0957:                }
0958:
0959:                private static int computeContainsBmp(List members) {
0960:                    int ret = NONE;
0961:                    for (int i = 0, len = members.size(); i < len; i++)
0962:                        ret = Math.max(ret, ((CharClass) members.get(i))
0963:                                .getContainsBmp());
0964:                    return ret;
0965:                }
0966:
0967:                private static int computeContainsNonBmp(List members) {
0968:                    int ret = NONE;
0969:                    for (int i = 0, len = members.size(); i < len; i++)
0970:                        ret = Math.max(ret, ((CharClass) members.get(i))
0971:                                .getContainsNonBmp());
0972:                    return ret;
0973:                }
0974:            }
0975:
0976:            static class BackReference extends CharClass {
0977:                private final int i;
0978:
0979:                BackReference(int i) {
0980:                    super (SOME, NONE);
0981:                    this .i = i;
0982:                }
0983:
0984:                void outputBmp(FastStringBuffer buf) {
0985:                    inClassOutputBmp(buf);
0986:                }
0987:
0988:                void outputComplementBmp(FastStringBuffer buf) {
0989:                    inClassOutputBmp(buf);
0990:                }
0991:
0992:                void inClassOutputBmp(FastStringBuffer buf) {
0993:                    buf.append("\\" + i);
0994:                }
0995:            }
0996:
0997:            /**
0998:             * Thrown when an syntactically incorrect regular expression is detected.
0999:             */
1000:            public static class RegexSyntaxException extends Exception {
1001:                private final int position;
1002:
1003:                /**
1004:                 * Represents an unknown position within a string containing a regular expression.
1005:                 */
1006:                public static final int UNKNOWN_POSITION = -1;
1007:
1008:                public RegexSyntaxException(String detail) {
1009:                    this (detail, UNKNOWN_POSITION);
1010:                }
1011:
1012:                public RegexSyntaxException(String detail, int position) {
1013:                    super (detail);
1014:                    this .position = position;
1015:                }
1016:
1017:                /**
1018:                 * Returns the index into the regular expression where the error was detected
1019:                 * or <code>UNKNOWN_POSITION</code> if this is unknown.
1020:                 *
1021:                 * @return the index into the regular expression where the error was detected,
1022:                 * or <code>UNKNOWNN_POSITION</code> if this is unknown
1023:                 */
1024:                public int getPosition() {
1025:                    return position;
1026:                }
1027:            }
1028:
1029:            //    public static class Localizer {
1030:            //        private final Class cls;
1031:            //        private ResourceBundle bundle;
1032:            //
1033:            //        public Localizer(Class cls) {
1034:            //            this.cls = cls;
1035:            //        }
1036:            //
1037:            //        public String message(String key) {
1038:            //            return MessageFormat.format(getBundle().getString(key), new Object[]{});
1039:            //        }
1040:            //
1041:            //        public String message(String key, Object arg) {
1042:            //            return MessageFormat.format(getBundle().getString(key),
1043:            //                                        new Object[]{arg});
1044:            //        }
1045:            //
1046:            //        public String message(String key, Object arg1, Object arg2) {
1047:            //            return MessageFormat.format(getBundle().getString(key),
1048:            //                                        new Object[]{arg1, arg2});
1049:            //        }
1050:            //
1051:            //        public String message(String key, Object[] args) {
1052:            //            return MessageFormat.format(getBundle().getString(key), args);
1053:            //        }
1054:            //
1055:            //        private ResourceBundle getBundle() {
1056:            //            if (bundle == null) {
1057:            //                String s = cls.getName();
1058:            //                int i = s.lastIndexOf('.');
1059:            //                if (i > 0)
1060:            //                    s = s.substring(0, i + 1);
1061:            //                else
1062:            //                    s = "";
1063:            //                bundle = ResourceBundle.getBundle(s + "resources.Messages");
1064:            //            }
1065:            //            return bundle;
1066:            //        }
1067:            //    }
1068:
1069:            static class Complement extends CharClass {
1070:                private final CharClass cc;
1071:
1072:                Complement(CharClass cc) {
1073:                    super (-cc.getContainsBmp(), -cc.getContainsNonBmp());
1074:                    this .cc = cc;
1075:                }
1076:
1077:                void outputBmp(FastStringBuffer buf) {
1078:                    cc.outputComplementBmp(buf);
1079:                }
1080:
1081:                void outputComplementBmp(FastStringBuffer buf) {
1082:                    cc.outputBmp(buf);
1083:                }
1084:
1085:                void addNonBmpRanges(List ranges) {
1086:                    List tem = new Vector();
1087:                    cc.addNonBmpRanges(tem);
1088:                    sortRangeList(tem);
1089:                    int c = NONBMP_MIN;
1090:                    for (int i = 0, len = tem.size(); i < len; i++) {
1091:                        Range r = (Range) tem.get(i);
1092:                        if (r.getMin() > c)
1093:                            ranges.add(new Range(c, r.getMin() - 1));
1094:                        c = r.getMax() + 1;
1095:                    }
1096:                    if (c != NONBMP_MAX + 1)
1097:                        ranges.add(new Range(c, NONBMP_MAX));
1098:                }
1099:            }
1100:
1101:            private boolean translateAtom() throws RegexSyntaxException {
1102:                switch (curChar) {
1103:                case EOS:
1104:                    if (!eos)
1105:                        break;
1106:                    // fall through
1107:                case '?':
1108:                case '*':
1109:                case '+':
1110:                case ')':
1111:                case '{':
1112:                case '}':
1113:                case '|':
1114:                case ']':
1115:                    return false;
1116:                case '(':
1117:                    copyCurChar();
1118:                    translateRegExp();
1119:                    expect(')');
1120:                    copyCurChar();
1121:                    return true;
1122:                case '\\':
1123:                    advance();
1124:                    parseEsc().output(result);
1125:                    return true;
1126:                case '[':
1127:                    advance();
1128:                    parseCharClassExpr().output(result);
1129:                    return true;
1130:                case '.':
1131:                    if (isXPath) {
1132:                        // Note: "." matches a surrogate pair under JDK 1.5, but not under JDK 1.4
1133:                        // We'll live with this problem until 1.4 goes away...
1134:                        break;
1135:                    } else {
1136:                        DOT.output(result);
1137:                        advance();
1138:                        return true;
1139:                    }
1140:                case '$':
1141:                case '^':
1142:                    if (isXPath) {
1143:                        copyCurChar();
1144:                        return true;
1145:                    }
1146:                    result.append('\\');
1147:                    break;
1148:                }
1149:                copyCurChar();
1150:                return true;
1151:            }
1152:
1153:            static private CharClass makeCharClass(String categories,
1154:                    String includes, String excludeRanges) {
1155:                List includeList = new Vector();
1156:                for (int i = 0, len = categories.length(); i < len; i += 2)
1157:                    includeList
1158:                            .add(new Property(categories.substring(i, i + 2)));
1159:                for (int i = 0, len = includes.length(); i < len; i++) {
1160:                    int j = i + 1;
1161:                    for (; j < len
1162:                            && includes.charAt(j) - includes.charAt(i) == j - i; j++)
1163:                        ;
1164:                    --j;
1165:                    if (i == j - 1)
1166:                        --j;
1167:                    if (i == j)
1168:                        includeList.add(new SingleChar(includes.charAt(i)));
1169:                    else
1170:                        includeList.add(new CharRange(includes.charAt(i),
1171:                                includes.charAt(j)));
1172:                    i = j;
1173:                }
1174:                List excludeList = new Vector();
1175:                for (int i = 0, len = excludeRanges.length(); i < len; i += 2) {
1176:                    char min = excludeRanges.charAt(i);
1177:                    char max = excludeRanges.charAt(i + 1);
1178:                    if (min == max)
1179:                        excludeList.add(new SingleChar(min));
1180:                    else if (min == max - 1) {
1181:                        excludeList.add(new SingleChar(min));
1182:                        excludeList.add(new SingleChar(max));
1183:                    } else
1184:                        excludeList.add(new CharRange(min, max));
1185:                }
1186:                return new Subtraction(new Union(includeList), new Union(
1187:                        excludeList));
1188:            }
1189:
1190:            private CharClass parseEsc() throws RegexSyntaxException {
1191:                switch (curChar) {
1192:                case 'n':
1193:                    advance();
1194:                    return new SingleChar('\n');
1195:                case 'r':
1196:                    advance();
1197:                    return new SingleChar('\r');
1198:                case 't':
1199:                    advance();
1200:                    return new SingleChar('\t');
1201:                case '\\':
1202:                case '|':
1203:                case '.':
1204:                case '-':
1205:                case '^':
1206:                case '?':
1207:                case '*':
1208:                case '+':
1209:                case '(':
1210:                case ')':
1211:                case '{':
1212:                case '}':
1213:                case '[':
1214:                case ']':
1215:                    break;
1216:                case 's':
1217:                    advance();
1218:                    return ESC_s;
1219:                case 'S':
1220:                    advance();
1221:                    return ESC_S;
1222:                case 'i':
1223:                    advance();
1224:                    return ESC_i;
1225:                case 'I':
1226:                    advance();
1227:                    return ESC_I;
1228:                case 'c':
1229:                    advance();
1230:                    return ESC_c;
1231:                case 'C':
1232:                    advance();
1233:                    return ESC_C;
1234:                case 'd':
1235:                    advance();
1236:                    return ESC_d;
1237:                case 'D':
1238:                    advance();
1239:                    return ESC_D;
1240:                case 'w':
1241:                    advance();
1242:                    return ESC_w;
1243:                case 'W':
1244:                    advance();
1245:                    return ESC_W;
1246:                case 'p':
1247:                    advance();
1248:                    return parseProp();
1249:                case 'P':
1250:                    advance();
1251:                    return new Complement(parseProp());
1252:                case '0':
1253:                case '1':
1254:                case '2':
1255:                case '3':
1256:                case '4':
1257:                case '5':
1258:                case '6':
1259:                case '7':
1260:                case '8':
1261:                case '9':
1262:                    if (isXPath) {
1263:                        char c = curChar;
1264:                        advance();
1265:                        return new BackReference(c - '0');
1266:                    } else {
1267:                        throw makeException("digit not allowed after \\");
1268:                    }
1269:                case '$':
1270:                    if (isXPath) {
1271:                        break;
1272:                    }
1273:                    // otherwise fall through
1274:                default:
1275:                    throw makeException("invalid escape sequence");
1276:                }
1277:                CharClass tem = new SingleChar(curChar);
1278:                advance();
1279:                return tem;
1280:            }
1281:
1282:            private CharClass parseProp() throws RegexSyntaxException {
1283:                expect('{');
1284:                int start = pos;
1285:                for (;;) {
1286:                    advance();
1287:                    if (curChar == '}')
1288:                        break;
1289:                    if (!isAsciiAlnum(curChar) && curChar != '-')
1290:                        expect('}');
1291:                }
1292:                String propertyName = regExp.subSequence(start, pos - 1)
1293:                        .toString();
1294:                advance();
1295:                switch (propertyName.length()) {
1296:                case 0:
1297:                    throw makeException("empty property name");
1298:                case 2:
1299:                    int sci = subCategories.indexOf(propertyName);
1300:                    if (sci < 0 || sci % 2 == 1)
1301:                        throw makeException("unknown category");
1302:                    return getSubCategoryCharClass(sci / 2);
1303:                case 1:
1304:                    int ci = categories.indexOf(propertyName.charAt(0));
1305:                    if (ci < 0)
1306:                        throw makeException("unknown category", propertyName);
1307:                    return getCategoryCharClass(ci);
1308:                default:
1309:                    if (!propertyName.startsWith("Is"))
1310:                        break;
1311:                    String blockName = propertyName.substring(2);
1312:                    for (int i = 0; i < specialBlockNames.length; i++)
1313:                        if (blockName.equals(specialBlockNames[i]))
1314:                            return specialBlockCharClasses[i];
1315:                    if (!isBlock(blockName))
1316:                        throw makeException("invalid block name", blockName);
1317:                    return new Property("In" + blockName);
1318:                }
1319:                throw makeException("invalid property name", propertyName);
1320:            }
1321:
1322:            private static boolean isBlock(String name) {
1323:                for (int i = 0; i < blockNames.length; i++)
1324:                    if (name.equals(blockNames[i]))
1325:                        return true;
1326:                return false;
1327:            }
1328:
1329:            private static boolean isAsciiAlnum(char c) {
1330:                if ('a' <= c && c <= 'z')
1331:                    return true;
1332:                if ('A' <= c && c <= 'Z')
1333:                    return true;
1334:                if ('0' <= c && c <= '9')
1335:                    return true;
1336:                return false;
1337:            }
1338:
1339:            private void expect(char c) throws RegexSyntaxException {
1340:                if (curChar != c)
1341:                    throw makeException("expected",
1342:                            new String(new char[] { c }));
1343:            }
1344:
1345:            private CharClass parseCharClassExpr() throws RegexSyntaxException {
1346:                boolean compl;
1347:                if (curChar == '^') {
1348:                    advance();
1349:                    compl = true;
1350:                } else
1351:                    compl = false;
1352:                List members = new ArrayList(10);
1353:                boolean first = true;
1354:                do {
1355:                    CharClass lower = parseCharClassEscOrXmlChar(first);
1356:                    first = false;
1357:                    members.add(lower);
1358:                    if (curChar == '-') {
1359:                        advance();
1360:                        if (curChar == ']') { // MHK: [+-] is reallowed by Schema Oct 2004 2nd edition
1361:                            break;
1362:                        }
1363:                        if (curChar == '[') {
1364:                            break;
1365:                        }
1366:                        CharClass upper = parseCharClassEscOrXmlChar(first);
1367:                        if (lower.getSingleChar() < 0
1368:                                || upper.getSingleChar() < 0)
1369:                            throw makeException("multi_range");
1370:                        if (lower.getSingleChar() > upper.getSingleChar())
1371:                            throw makeException("invalid range (start > end)");
1372:                        members.set(members.size() - 1, new CharRange(lower
1373:                                .getSingleChar(), upper.getSingleChar()));
1374:                        if (curChar == '-') {
1375:                            advance();
1376:                            expect('[');
1377:                            break;
1378:                        }
1379:                    }
1380:                } while (curChar != ']');
1381:                CharClass result;
1382:                if (members.size() == 1)
1383:                    result = (CharClass) members.get(0);
1384:                else
1385:                    result = new Union(members);
1386:                if (compl)
1387:                    result = new Complement(result);
1388:                if (curChar == '[') {
1389:                    advance();
1390:                    result = new Subtraction(result, parseCharClassExpr());
1391:                    expect(']');
1392:                }
1393:                advance();
1394:                return result;
1395:            }
1396:
1397:            private CharClass parseCharClassEscOrXmlChar(boolean first)
1398:                    throws RegexSyntaxException {
1399:                switch (curChar) {
1400:                case EOS:
1401:                    if (eos)
1402:                        expect(']');
1403:                    break;
1404:                case '\\':
1405:                    advance();
1406:                    return parseEsc();
1407:                case '[':
1408:                case ']':
1409:                    throw makeException("character must be escaped",
1410:                            new String(new char[] { curChar }));
1411:                case '-':
1412:                    if (!first) {
1413:                        throw makeException("character must be escaped",
1414:                                new String(new char[] { curChar }));
1415:                    }
1416:                    break;
1417:                }
1418:                CharClass tem;
1419:                if (XMLChar.isSurrogate(curChar)) {
1420:                    if (!XMLChar.isHighSurrogate(curChar))
1421:                        throw makeException("invalid surrogate pair");
1422:                    char c1 = curChar;
1423:                    advance();
1424:                    if (!XMLChar.isLowSurrogate(curChar))
1425:                        throw makeException("invalid surrogate pair");
1426:                    tem = new WideSingleChar(XMLChar.supplemental(c1, curChar));
1427:                } else
1428:                    tem = new SingleChar(curChar);
1429:                advance();
1430:                return tem;
1431:            }
1432:
1433:            private RegexSyntaxException makeException(String key) {
1434:                return new RegexSyntaxException("Error at character "
1435:                        + (pos - 1) + " in regular expression: " + key);
1436:            }
1437:
1438:            private RegexSyntaxException makeException(String key, String arg) {
1439:                return new RegexSyntaxException("Error at character "
1440:                        + (pos - 1) + " in regular expression: " + key + " ("
1441:                        + arg + ')');
1442:            }
1443:
1444:            private static boolean isJavaMetaChar(char c) {
1445:                switch (c) {
1446:                case '\\':
1447:                case '^':
1448:                case '?':
1449:                case '*':
1450:                case '+':
1451:                case '(':
1452:                case ')':
1453:                case '{':
1454:                case '}':
1455:                case '|':
1456:                case '[':
1457:                case ']':
1458:                case '-':
1459:                case '&':
1460:                case '$':
1461:                case '.':
1462:                    return true;
1463:                }
1464:                return false;
1465:            }
1466:
1467:            private static synchronized CharClass getCategoryCharClass(int ci) {
1468:                if (categoryCharClasses[ci] == null)
1469:                    categoryCharClasses[ci] = computeCategoryCharClass(categories
1470:                            .charAt(ci));
1471:                return categoryCharClasses[ci];
1472:            }
1473:
1474:            private static synchronized CharClass getSubCategoryCharClass(
1475:                    int sci) {
1476:                if (subCategoryCharClasses[sci] == null)
1477:                    subCategoryCharClasses[sci] = computeSubCategoryCharClass(subCategories
1478:                            .substring(sci * 2, (sci + 1) * 2));
1479:                return subCategoryCharClasses[sci];
1480:            }
1481:
1482:            private static final char UNICODE_3_1_ADD_Lu = '\u03F4'; // added in 3.1
1483:            private static final char UNICODE_3_1_ADD_Ll = '\u03F5'; // added in 3.1
1484:            // 3 characters changed from No to Nl between 3.0 and 3.1
1485:            private static final char UNICODE_3_1_CHANGE_No_to_Nl_MIN = '\u16EE';
1486:            private static final char UNICODE_3_1_CHANGE_No_to_Nl_MAX = '\u16F0';
1487:            private static final String CATEGORY_Pi = "\u00AB\u2018\u201B\u201C\u201F\u2039"; // Java doesn't know about category Pi
1488:            private static final String CATEGORY_Pf = "\u00BB\u2019\u201D\u203A"; // Java doesn't know about category Pf
1489:
1490:            private static CharClass computeCategoryCharClass(char code) {
1491:                List classes = new Vector();
1492:                classes.add(new Property(new String(new char[] { code })));
1493:                for (int ci = CATEGORY_NAMES.indexOf(code); ci >= 0; ci = CATEGORY_NAMES
1494:                        .indexOf(code, ci + 1)) {
1495:                    int[] addRanges = CATEGORY_RANGES[ci / 2];
1496:                    for (int i = 0; i < addRanges.length; i += 2)
1497:                        classes.add(new CharRange(addRanges[i],
1498:                                addRanges[i + 1]));
1499:                }
1500:                if (code == 'P')
1501:                    classes.add(makeCharClass(CATEGORY_Pi + CATEGORY_Pf));
1502:                if (code == 'L') {
1503:                    classes.add(new SingleChar(UNICODE_3_1_ADD_Ll));
1504:                    classes.add(new SingleChar(UNICODE_3_1_ADD_Lu));
1505:                }
1506:                if (code == 'C') {
1507:                    // JDK 1.4 leaves Cn out of C?
1508:                    classes.add(new Subtraction(new Property("Cn"), new Union(
1509:                            new CharClass[] {
1510:                                    new SingleChar(UNICODE_3_1_ADD_Lu),
1511:                                    new SingleChar(UNICODE_3_1_ADD_Ll) })));
1512:                    List assignedRanges = new Vector();
1513:                    for (int i = 0; i < CATEGORY_RANGES.length; i++)
1514:                        for (int j = 0; j < CATEGORY_RANGES[i].length; j += 2)
1515:                            assignedRanges.add(new CharRange(
1516:                                    CATEGORY_RANGES[i][j],
1517:                                    CATEGORY_RANGES[i][j + 1]));
1518:                    classes.add(new Subtraction(new CharRange(NONBMP_MIN,
1519:                            NONBMP_MAX), new Union(assignedRanges)));
1520:                }
1521:                if (classes.size() == 1)
1522:                    return (CharClass) classes.get(0);
1523:                return new Union(classes);
1524:            }
1525:
1526:            private static CharClass computeSubCategoryCharClass(String name) {
1527:                CharClass base = new Property(name);
1528:                int sci = CATEGORY_NAMES.indexOf(name);
1529:                if (sci < 0) {
1530:                    if (name.equals("Cn")) {
1531:                        // Unassigned
1532:                        List assignedRanges = new Vector();
1533:                        assignedRanges.add(new SingleChar(UNICODE_3_1_ADD_Lu));
1534:                        assignedRanges.add(new SingleChar(UNICODE_3_1_ADD_Ll));
1535:                        for (int i = 0; i < CATEGORY_RANGES.length; i++)
1536:                            for (int j = 0; j < CATEGORY_RANGES[i].length; j += 2)
1537:                                assignedRanges.add(new CharRange(
1538:                                        CATEGORY_RANGES[i][j],
1539:                                        CATEGORY_RANGES[i][j + 1]));
1540:                        return new Subtraction(new Union(new CharClass[] {
1541:                                base, new CharRange(NONBMP_MIN, NONBMP_MAX) }),
1542:                                new Union(assignedRanges));
1543:                    }
1544:                    if (name.equals("Pi"))
1545:                        return makeCharClass(CATEGORY_Pi);
1546:                    if (name.equals("Pf"))
1547:                        return makeCharClass(CATEGORY_Pf);
1548:                    return base;
1549:                }
1550:                List classes = new Vector();
1551:                classes.add(base);
1552:                int[] addRanges = CATEGORY_RANGES[sci / 2];
1553:                for (int i = 0; i < addRanges.length; i += 2)
1554:                    classes.add(new CharRange(addRanges[i], addRanges[i + 1]));
1555:                if (name.equals("Lu"))
1556:                    classes.add(new SingleChar(UNICODE_3_1_ADD_Lu));
1557:                else if (name.equals("Ll"))
1558:                    classes.add(new SingleChar(UNICODE_3_1_ADD_Ll));
1559:                else if (name.equals("Nl"))
1560:                    classes.add(new CharRange(UNICODE_3_1_CHANGE_No_to_Nl_MIN,
1561:                            UNICODE_3_1_CHANGE_No_to_Nl_MAX));
1562:                else if (name.equals("No"))
1563:                    return new Subtraction(new Union(classes), new CharRange(
1564:                            UNICODE_3_1_CHANGE_No_to_Nl_MIN,
1565:                            UNICODE_3_1_CHANGE_No_to_Nl_MAX));
1566:                return new Union(classes);
1567:            }
1568:
1569:            private static CharClass makeCharClass(String members) {
1570:                List list = new Vector();
1571:                for (int i = 0, len = members.length(); i < len; i++)
1572:                    list.add(new SingleChar(members.charAt(i)));
1573:                return new Union(list);
1574:            }
1575:
1576:            public static void main(String[] args) throws RegexSyntaxException {
1577:                String s = translate(args[0], args[1].equals("xpath"));
1578:                for (int i = 0, len = s.length(); i < len; i++) {
1579:                    char c = s.charAt(i);
1580:                    if (c >= 0x20 && c <= 0x7e)
1581:                        System.err.print(c);
1582:                    else {
1583:                        System.err.print("\\u");
1584:                        for (int shift = 12; shift >= 0; shift -= 4)
1585:                            System.err.print("0123456789ABCDEF"
1586:                                    .charAt((c >> shift) & 0xF));
1587:                    }
1588:                }
1589:                System.err.println();
1590:            }
1591:
1592:            //}
1593:
1594:        }
1595:
1596:        //
1597:        // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
1598:        // you may not use this file except in compliance with the License. You may obtain a copy of the
1599:        // License at http://www.mozilla.org/MPL/
1600:        //
1601:        // Software distributed under the License is distributed on an "AS IS" basis,
1602:        // WITHOUT WARRANTY OF ANY KIND, either express or implied.
1603:        // See the License for the specific language governing rights and limitations under the License.
1604:        //
1605:        // The Original Code is: all this file except changes marked.
1606:        //
1607:        // The Initial Developer of the Original Code is James Clark
1608:        //
1609:        // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
1610:        //
1611:        // Contributor(s): Michael Kay
1612:        //
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.