001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.xerces.impl.xpath.regex;
019:
020: import java.text.CharacterIterator;
021:
022: /**
023: * @xerces.internal
024: *
025: * @version $Id: REUtil.java 446721 2006-09-15 20:35:34Z mrglavas $
026: */
027: public final class REUtil {
028: private REUtil() {
029: }
030:
031: static final int composeFromSurrogates(int high, int low) {
032: return 0x10000 + ((high - 0xd800) << 10) + low - 0xdc00;
033: }
034:
035: static final boolean isLowSurrogate(int ch) {
036: return (ch & 0xfc00) == 0xdc00;
037: }
038:
039: static final boolean isHighSurrogate(int ch) {
040: return (ch & 0xfc00) == 0xd800;
041: }
042:
043: static final String decomposeToSurrogates(int ch) {
044: char[] chs = new char[2];
045: ch -= 0x10000;
046: chs[0] = (char) ((ch >> 10) + 0xd800);
047: chs[1] = (char) ((ch & 0x3ff) + 0xdc00);
048: return new String(chs);
049: }
050:
051: static final String substring(CharacterIterator iterator,
052: int begin, int end) {
053: char[] src = new char[end - begin];
054: for (int i = 0; i < src.length; i++)
055: src[i] = iterator.setIndex(i + begin);
056: return new String(src);
057: }
058:
059: // ================================================================
060:
061: static final int getOptionValue(int ch) {
062: int ret = 0;
063: switch (ch) {
064: case 'i':
065: ret = RegularExpression.IGNORE_CASE;
066: break;
067: case 'm':
068: ret = RegularExpression.MULTIPLE_LINES;
069: break;
070: case 's':
071: ret = RegularExpression.SINGLE_LINE;
072: break;
073: case 'x':
074: ret = RegularExpression.EXTENDED_COMMENT;
075: break;
076: case 'u':
077: ret = RegularExpression.USE_UNICODE_CATEGORY;
078: break;
079: case 'w':
080: ret = RegularExpression.UNICODE_WORD_BOUNDARY;
081: break;
082: case 'F':
083: ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION;
084: break;
085: case 'H':
086: ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
087: break;
088: case 'X':
089: ret = RegularExpression.XMLSCHEMA_MODE;
090: break;
091: case ',':
092: ret = RegularExpression.SPECIAL_COMMA;
093: break;
094: default:
095: }
096: return ret;
097: }
098:
099: static final int parseOptions(String opts) throws ParseException {
100: if (opts == null)
101: return 0;
102: int options = 0;
103: for (int i = 0; i < opts.length(); i++) {
104: int v = getOptionValue(opts.charAt(i));
105: if (v == 0)
106: throw new ParseException("Unknown Option: "
107: + opts.substring(i), -1);
108: options |= v;
109: }
110: return options;
111: }
112:
113: static final String createOptionString(int options) {
114: StringBuffer sb = new StringBuffer(9);
115: if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
116: sb.append((char) 'F');
117: if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
118: sb.append((char) 'H');
119: if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
120: sb.append((char) 'X');
121: if ((options & RegularExpression.IGNORE_CASE) != 0)
122: sb.append((char) 'i');
123: if ((options & RegularExpression.MULTIPLE_LINES) != 0)
124: sb.append((char) 'm');
125: if ((options & RegularExpression.SINGLE_LINE) != 0)
126: sb.append((char) 's');
127: if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
128: sb.append((char) 'u');
129: if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
130: sb.append((char) 'w');
131: if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
132: sb.append((char) 'x');
133: if ((options & RegularExpression.SPECIAL_COMMA) != 0)
134: sb.append((char) ',');
135: return sb.toString().intern();
136: }
137:
138: // ================================================================
139:
140: static String stripExtendedComment(String regex) {
141: int len = regex.length();
142: StringBuffer buffer = new StringBuffer(len);
143: int offset = 0;
144: while (offset < len) {
145: int ch = regex.charAt(offset++);
146: // Skips a white space.
147: if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r'
148: || ch == ' ')
149: continue;
150:
151: if (ch == '#') { // Skips chracters between '#' and a line end.
152: while (offset < len) {
153: ch = regex.charAt(offset++);
154: if (ch == '\r' || ch == '\n')
155: break;
156: }
157: continue;
158: }
159:
160: int next; // Strips an escaped white space.
161: if (ch == '\\' && offset < len) {
162: if ((next = regex.charAt(offset)) == '#'
163: || next == '\t' || next == '\n' || next == '\f'
164: || next == '\r' || next == ' ') {
165: buffer.append((char) next);
166: offset++;
167: } else { // Other escaped character.
168: buffer.append((char) '\\');
169: buffer.append((char) next);
170: offset++;
171: }
172: } else
173: // As is.
174: buffer.append((char) ch);
175: }
176: return buffer.toString();
177: }
178:
179: // ================================================================
180:
181: /**
182: * Sample entry.
183: * <div>Usage: <KBD>org.apache.xerces.utils.regex.REUtil <regex> <string></KBD></div>
184: */
185: public static void main(String[] argv) {
186: String pattern = null;
187: try {
188: String options = "";
189: String target = null;
190: if (argv.length == 0) {
191: System.out
192: .println("Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String");
193: System.exit(0);
194: }
195: for (int i = 0; i < argv.length; i++) {
196: if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
197: if (pattern == null)
198: pattern = argv[i];
199: else if (target == null)
200: target = argv[i];
201: else
202: System.err.println("Unnecessary: " + argv[i]);
203: } else if (argv[i].equals("-i")) {
204: options += "i";
205: } else if (argv[i].equals("-m")) {
206: options += "m";
207: } else if (argv[i].equals("-s")) {
208: options += "s";
209: } else if (argv[i].equals("-u")) {
210: options += "u";
211: } else if (argv[i].equals("-w")) {
212: options += "w";
213: } else if (argv[i].equals("-X")) {
214: options += "X";
215: } else {
216: System.err.println("Unknown option: " + argv[i]);
217: }
218: }
219: RegularExpression reg = new RegularExpression(pattern,
220: options);
221: System.out.println("RegularExpression: " + reg);
222: Match match = new Match();
223: reg.matches(target, match);
224: for (int i = 0; i < match.getNumberOfGroups(); i++) {
225: if (i == 0)
226: System.out
227: .print("Matched range for the whole pattern: ");
228: else
229: System.out.print("[" + i + "]: ");
230: if (match.getBeginning(i) < 0)
231: System.out.println("-1");
232: else {
233: System.out.print(match.getBeginning(i) + ", "
234: + match.getEnd(i) + ", ");
235: System.out.println("\"" + match.getCapturedText(i)
236: + "\"");
237: }
238: }
239: } catch (ParseException pe) {
240: if (pattern == null) {
241: pe.printStackTrace();
242: } else {
243: System.err
244: .println("org.apache.xerces.utils.regex.ParseException: "
245: + pe.getMessage());
246: String indent = " ";
247: System.err.println(indent + pattern);
248: int loc = pe.getLocation();
249: if (loc >= 0) {
250: System.err.print(indent);
251: for (int i = 0; i < loc; i++)
252: System.err.print("-");
253: System.err.println("^");
254: }
255: }
256: } catch (Exception e) {
257: e.printStackTrace();
258: }
259: }
260:
261: static final int CACHESIZE = 20;
262: static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
263:
264: /**
265: * Creates a RegularExpression instance.
266: * This method caches created instances.
267: *
268: * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String)
269: */
270: public static RegularExpression createRegex(String pattern,
271: String options) throws ParseException {
272: RegularExpression re = null;
273: int intOptions = REUtil.parseOptions(options);
274: synchronized (REUtil.regexCache) {
275: int i;
276: for (i = 0; i < REUtil.CACHESIZE; i++) {
277: RegularExpression cached = REUtil.regexCache[i];
278: if (cached == null) {
279: i = -1;
280: break;
281: }
282: if (cached.equals(pattern, intOptions)) {
283: re = cached;
284: break;
285: }
286: }
287: if (re != null) {
288: if (i != 0) {
289: System.arraycopy(REUtil.regexCache, 0,
290: REUtil.regexCache, 1, i);
291: REUtil.regexCache[0] = re;
292: }
293: } else {
294: re = new RegularExpression(pattern, options);
295: System.arraycopy(REUtil.regexCache, 0,
296: REUtil.regexCache, 1, REUtil.CACHESIZE - 1);
297: REUtil.regexCache[0] = re;
298: }
299: }
300: return re;
301: }
302:
303: /**
304: *
305: * @see RegularExpression#matches(java.lang.String)
306: */
307: public static boolean matches(String regex, String target)
308: throws ParseException {
309: return REUtil.createRegex(regex, null).matches(target);
310: }
311:
312: /**
313: *
314: * @see RegularExpression#matches(java.lang.String)
315: */
316: public static boolean matches(String regex, String options,
317: String target) throws ParseException {
318: return REUtil.createRegex(regex, options).matches(target);
319: }
320:
321: // ================================================================
322:
323: /**
324: *
325: */
326: public static String quoteMeta(String literal) {
327: int len = literal.length();
328: StringBuffer buffer = null;
329: for (int i = 0; i < len; i++) {
330: int ch = literal.charAt(i);
331: if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
332: if (buffer == null) {
333: buffer = new StringBuffer(i + (len - i) * 2);
334: if (i > 0)
335: buffer.append(literal.substring(0, i));
336: }
337: buffer.append((char) '\\');
338: buffer.append((char) ch);
339: } else if (buffer != null)
340: buffer.append((char) ch);
341: }
342: return buffer != null ? buffer.toString() : literal;
343: }
344:
345: // ================================================================
346:
347: static void dumpString(String v) {
348: for (int i = 0; i < v.length(); i++) {
349: System.out.print(Integer.toHexString(v.charAt(i)));
350: System.out.print(" ");
351: }
352: System.out.println();
353: }
354: }
|