001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package java.util.regex;
019:
020: import java.util.ArrayList;
021:
022: import org.apache.harmony.regex.internal.nls.Messages;
023:
024: /**
025: * Note: main functionality of this class is hidden into nodes match methods.
026: */
027: public final class Matcher implements MatchResult {
028:
029: static int MODE_FIND = 1 << 0;
030:
031: static int MODE_MATCH = 1 << 1;
032:
033: private Pattern pat = null;
034:
035: private AbstractSet start = null;
036:
037: private CharSequence string = null;
038:
039: private MatchResultImpl matchResult = null;
040:
041: // bounds
042: private int leftBound = -1;
043:
044: private int rightBound = -1;
045:
046: // replacements
047: private int appendPos = 0;
048:
049: private String replacement = null;
050:
051: private String processedRepl = null;
052:
053: private ArrayList replacementParts = null;
054:
055: /**
056: * @com.intel.drl.spec_ref
057: */
058: public Matcher appendReplacement(StringBuffer sb, String replacement) {
059: processedRepl = processReplacement(replacement);
060: sb.append(string.subSequence(appendPos, start()));
061: sb.append(processedRepl);
062: appendPos = end();
063: return this ;
064: }
065:
066: /**
067: * Parses replacement string and creates pattern
068: */
069: private String processReplacement(String replacement) {
070: if (this .replacement != null
071: && this .replacement.equals(replacement)) {
072: if (replacementParts == null) {
073: return processedRepl;
074: } else {
075: StringBuffer sb = new StringBuffer();
076: for (int i = 0; i < replacementParts.size(); i++) {
077: sb.append(replacementParts.get(i));
078: }
079:
080: return sb.toString();
081: }
082: } else {
083: this .replacement = replacement;
084: char[] repl = replacement.toCharArray();
085: StringBuffer res = new StringBuffer();
086: replacementParts = null;
087:
088: int index = 0;
089: int replacementPos = 0;
090: boolean nextBackSlashed = false;
091:
092: while (index < repl.length) {
093:
094: if (repl[index] == '\\' && !nextBackSlashed) {
095: nextBackSlashed = true;
096: index++;
097: }
098:
099: if (nextBackSlashed) {
100: res.append(repl[index]);
101: nextBackSlashed = false;
102: } else {
103: if (repl[index] == '$') {
104: if (replacementParts == null) {
105: replacementParts = new ArrayList();
106: }
107: try {
108: final int gr = Integer.parseInt(new String(
109: repl, ++index, 1));
110:
111: if (replacementPos != res.length()) {
112: replacementParts.add(res.subSequence(
113: replacementPos, res.length()));
114: replacementPos = res.length();
115: }
116:
117: replacementParts.add(new Object() { //$NON-LOCK-1$
118: private final int grN = gr;
119:
120: public String toString() {
121: return group(grN);
122: }
123: });
124: String group = group(gr);
125: replacementPos += group.length();
126: res.append(group);
127:
128: } catch (IndexOutOfBoundsException iob) {
129: throw iob;
130: } catch (Exception e) {
131: throw new IllegalArgumentException(Messages
132: .getString("regex.00")); //$NON-NLS-1$
133: }
134: } else {
135: res.append(repl[index]);
136: }
137: }
138:
139: index++;
140: }
141:
142: if (replacementParts != null
143: && replacementPos != res.length()) {
144: replacementParts.add(res.subSequence(replacementPos,
145: res.length()));
146: }
147: return res.toString();
148: }
149: }
150:
151: /**
152: * @com.intel.drl.spec_ref
153: */
154: public Matcher reset(CharSequence newSequence) {
155: if (newSequence == null) {
156: throw new NullPointerException(Messages
157: .getString("regex.01")); //$NON-NLS-1$
158: }
159: this .string = newSequence;
160: return reset();
161: }
162:
163: /**
164: * @com.intel.drl.spec_ref
165: */
166: public Matcher reset() {
167: this .leftBound = 0;
168: this .rightBound = string.length();
169: matchResult.reset(string, leftBound, rightBound);
170: appendPos = 0;
171: replacement = null;
172: matchResult.previousMatch = -1;
173: return this ;
174: }
175:
176: /**
177: * @com.intel.drl.spec_ref
178: */
179: public Matcher region(int leftBound, int rightBound) {
180:
181: if (leftBound > rightBound || leftBound < 0 || rightBound < 0
182: || leftBound > string.length()
183: || rightBound > string.length()) {
184: throw new IndexOutOfBoundsException(Messages.getString(
185: "regex.02", //$NON-NLS-1$
186: Integer.toString(leftBound), Integer
187: .toString(rightBound)));
188: }
189:
190: this .leftBound = leftBound;
191: this .rightBound = rightBound;
192: matchResult.reset(null, leftBound, rightBound);
193: appendPos = 0;
194: replacement = null;
195:
196: return this ;
197: }
198:
199: /**
200: * TODO: appendTail(StringBuffer) javadoc
201: *
202: * @param sb
203: * @return
204: */
205: public StringBuffer appendTail(StringBuffer sb) {
206: return sb
207: .append(string.subSequence(appendPos, string.length()));
208: }
209:
210: /**
211: * This is very similar to replaceAll except only the first occurrence of a
212: * sequence matching the pattern is replaced.
213: *
214: * @param replacement
215: * A string to replace occurrences of character sequences
216: * matching the pattern.
217: * @return A new string with replacements inserted
218: */
219: public String replaceFirst(String replacement) {
220: reset();
221: if (find()) {
222: StringBuffer sb = new StringBuffer();
223: appendReplacement(sb, replacement);
224: return appendTail(sb).toString();
225: }
226:
227: return string.toString();
228:
229: }
230:
231: /**
232: * Replace all occurrences of character sequences which match the pattern
233: * with the given replacement string. The replacement string may refer to
234: * capturing groups using the syntax "$<group number>".
235: *
236: * @param replacement
237: * A string to replace occurrences of character sequences
238: * matching the pattern.
239: * @return A new string with replacements inserted
240: */
241: public String replaceAll(String replacement) {
242: StringBuffer sb = new StringBuffer();
243: reset();
244: while (find()) {
245: appendReplacement(sb, replacement);
246: }
247:
248: return appendTail(sb).toString();
249: }
250:
251: /**
252: * Return a reference to the pattern used by this Matcher.
253: *
254: * @return A reference to the pattern used by this Matcher.
255: */
256: public Pattern pattern() {
257: return pat;
258: }
259:
260: /**
261: * @com.intel.drl.spec_ref
262: */
263: public String group(int groupIndex) {
264: return matchResult.group(groupIndex);
265: }
266:
267: /**
268: * @com.intel.drl.spec_ref
269: */
270: public String group() {
271: return group(0);
272: }
273:
274: /**
275: * @com.intel.drl.spec_ref
276: */
277: public boolean find(int startIndex) {
278: int stringLength = string.length();
279: if (startIndex < 0 || startIndex > stringLength)
280: throw new IndexOutOfBoundsException(Messages.getString(
281: "regex.03", //$NON-NLS-1$
282: new Integer(startIndex)));
283:
284: startIndex = findAt(startIndex);
285: if (startIndex >= 0 && matchResult.isValid()) {
286: matchResult.finalizeMatch();
287: return true;
288: }
289: matchResult.startIndex = -1;
290: return false;
291: }
292:
293: private int findAt(int startIndex) {
294: matchResult.reset();
295: matchResult.setMode(Matcher.MODE_FIND);
296: matchResult.setStartIndex(startIndex);
297: int foundIndex = start.find(startIndex, string, matchResult);
298: if (foundIndex == -1) {
299: matchResult.hitEnd = true;
300: }
301: return foundIndex;
302: }
303:
304: /**
305: * The find() method matches the pattern against the character sequence
306: * beginning at the character after the last match or at the beginning of
307: * the sequence if called immediately after reset(). The method returns true
308: * if and only if a match is found.
309: *
310: * @return A boolean indicating if the pattern was matched.
311: */
312: public boolean find() {
313: int length = string.length();
314: if (!hasTransparentBounds())
315: length = rightBound;
316: if (matchResult.startIndex >= 0
317: && matchResult.mode() == Matcher.MODE_FIND) {
318: matchResult.startIndex = matchResult.end();
319: if (matchResult.end() == matchResult.start()) {
320: matchResult.startIndex++;
321: }
322:
323: return matchResult.startIndex <= length ? find(matchResult.startIndex)
324: : false;
325: } else {
326: return find(leftBound);
327: }
328: }
329:
330: /**
331: * @com.intel.drl.spec_ref
332: */
333: public int start(int groupIndex) {
334: return matchResult.start(groupIndex);
335: }
336:
337: /**
338: * @com.intel.drl.spec_ref
339: */
340: public int end(int groupIndex) {
341: return matchResult.end(groupIndex);
342: }
343:
344: /**
345: * This method is identical in function to the Pattern.matches() method. It
346: * returns true if and only if the regular expression pattern matches the
347: * entire input character sequence.
348: *
349: * @return A boolean indicating if the pattern matches the entire input
350: * character sequence.
351: */
352: public boolean matches() {
353: return lookingAt(leftBound, Matcher.MODE_MATCH);
354: }
355:
356: /**
357: * @com.intel.drl.spec_ref
358: */
359: public static String quoteReplacement(String string) {
360: // first check whether we have smth to quote
361: if (string.indexOf('\\') < 0 && string.indexOf('$') < 0)
362: return string;
363: StringBuffer res = new StringBuffer(string.length() * 2);
364: char ch;
365: int len = string.length();
366:
367: for (int i = 0; i < len; i++) {
368:
369: switch (ch = string.charAt(i)) {
370: case '$':
371: res.append('\\');
372: res.append('$');
373: break;
374: case '\\':
375: res.append('\\');
376: res.append('\\');
377: break;
378: default:
379: res.append(ch);
380: }
381: }
382:
383: return res.toString();
384: }
385:
386: /**
387: * Runs match starting from <code>set</code> specified against input
388: * sequence starting at <code>index</code> specified; Result of the match
389: * will be stored into matchResult instance;
390: */
391: private boolean runMatch(AbstractSet set, int index,
392: MatchResultImpl matchResult) {
393:
394: if (set.matches(index, string, matchResult) >= 0) {
395: matchResult.finalizeMatch();
396: return true;
397: }
398:
399: return false;
400: }
401:
402: /**
403: * This method attempts to match the pattern against the character sequence
404: * starting at the beginning. If the pattern matches even a prefix of the
405: * input character sequence, lookingAt() will return true. Otherwise it will
406: * return false.
407: *
408: * @return A boolean indicating if the pattern matches a prefix of the input
409: * character sequence.
410: */
411: public boolean lookingAt() {
412: return lookingAt(leftBound, Matcher.MODE_FIND);
413: }
414:
415: private boolean lookingAt(int startIndex, int mode) {
416: matchResult.reset();
417: matchResult.setMode(mode);
418: matchResult.setStartIndex(startIndex);
419: return runMatch(start, startIndex, matchResult);
420: }
421:
422: /**
423: * @com.intel.drl.spec_ref
424: */
425: public int start() {
426: return start(0);
427: }
428:
429: /**
430: * Return the number of capturing groups in the pattern.
431: *
432: * @return The number of capturing groups in the pattern.
433: */
434: public int groupCount() {
435: return matchResult.groupCount();
436: }
437:
438: /**
439: * @com.intel.drl.spec_ref
440: */
441: public int end() {
442: return end(0);
443: }
444:
445: /**
446: * @com.intel.drl.spec_ref
447: */
448: public MatchResult toMatchResult() {
449: return this .matchResult.cloneImpl();
450: }
451:
452: /**
453: * @com.intel.drl.spec_ref
454: */
455: public Matcher useAnchoringBounds(boolean value) {
456: matchResult.useAnchoringBounds(value);
457: return this ;
458: }
459:
460: /**
461: * @com.intel.drl.spec_ref
462: */
463: public boolean hasAnchoringBounds() {
464: return matchResult.hasAnchoringBounds();
465: }
466:
467: /**
468: * @com.intel.drl.spec_ref
469: */
470: public Matcher useTransparentBounds(boolean value) {
471: matchResult.useTransparentBounds(value);
472: return this ;
473: }
474:
475: /**
476: * @com.intel.drl.spec_ref
477: */
478: public boolean hasTransparentBounds() {
479: return matchResult.hasTransparentBounds();
480: }
481:
482: /**
483: * @com.intel.drl.spec_ref
484: */
485: public int regionStart() {
486: return matchResult.getLeftBound();
487: }
488:
489: /**
490: * @com.intel.drl.spec_ref
491: */
492: public int regionEnd() {
493: return matchResult.getRightBound();
494: }
495:
496: /**
497: * @com.intel.drl.spec_ref
498: */
499: public boolean requireEnd() {
500: return matchResult.requireEnd;
501: }
502:
503: /**
504: * @com.intel.drl.spec_ref
505: */
506: public boolean hitEnd() {
507: return matchResult.hitEnd;
508: }
509:
510: /**
511: * @com.intel.drl.spec_ref
512: */
513: public Matcher usePattern(Pattern pat) {
514: if (pat == null) {
515: throw new IllegalArgumentException(Messages
516: .getString("regex.1B"));
517: }
518: int startIndex = matchResult.getPreviousMatchEnd();
519: int mode = matchResult.mode();
520: this .pat = pat;
521: this .start = pat.start;
522: matchResult = new MatchResultImpl(this .string, leftBound,
523: rightBound, pat.groupCount(), pat.compCount(), pat
524: .consCount());
525: matchResult.setStartIndex(startIndex);
526: matchResult.setMode(mode);
527: return this ;
528: }
529:
530: Matcher(Pattern pat, CharSequence cs) {
531: this .pat = pat;
532: this .start = pat.start;
533: this .string = cs;
534: this .leftBound = 0;
535: this .rightBound = string.length();
536: matchResult = new MatchResultImpl(cs, leftBound, rightBound,
537: pat.groupCount(), pat.compCount(), pat.consCount());
538: }
539: }
|