001: /*******************************************************************************
002: * Copyright (c) 2000, 2005 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.jdt.internal.ui.util;
011:
012: import java.util.*;
013:
014: /**
015: * A string pattern matcher. Supports '*' and '?' wildcards.
016: */
017: public class StringMatcher {
018: protected String fPattern;
019: protected int fLength; // pattern length
020: protected boolean fIgnoreWildCards;
021: protected boolean fIgnoreCase;
022: protected boolean fHasLeadingStar;
023: protected boolean fHasTrailingStar;
024: protected String fSegments[]; //the given pattern is split into * separated segments
025:
026: /* boundary value beyond which we don't need to search in the text */
027: protected int fBound = 0;
028:
029: protected static final char fSingleWildCard = '\u0000';
030:
031: public static class Position {
032: int start; //inclusive
033: int end; //exclusive
034:
035: public Position(int start, int end) {
036: this .start = start;
037: this .end = end;
038: }
039:
040: public int getStart() {
041: return start;
042: }
043:
044: public int getEnd() {
045: return end;
046: }
047: }
048:
049: /**
050: * StringMatcher constructor takes in a String object that is a simple
051: * pattern. The pattern may contain '*' for 0 and many characters and
052: * '?' for exactly one character.
053: *
054: * Literal '*' and '?' characters must be escaped in the pattern
055: * e.g., "\*" means literal "*", etc.
056: *
057: * Escaping any other character (including the escape character itself),
058: * just results in that character in the pattern.
059: * e.g., "\a" means "a" and "\\" means "\"
060: *
061: * If invoking the StringMatcher with string literals in Java, don't forget
062: * escape characters are represented by "\\".
063: *
064: * @param pattern the pattern to match text against
065: * @param ignoreCase if true, case is ignored
066: * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
067: * (everything is taken literally).
068: */
069: public StringMatcher(String pattern, boolean ignoreCase,
070: boolean ignoreWildCards) {
071: if (pattern == null)
072: throw new IllegalArgumentException();
073: fIgnoreCase = ignoreCase;
074: fIgnoreWildCards = ignoreWildCards;
075: fPattern = pattern;
076: fLength = pattern.length();
077:
078: if (fIgnoreWildCards) {
079: parseNoWildCards();
080: } else {
081: parseWildCards();
082: }
083: }
084:
085: /**
086: * Find the first occurrence of the pattern between <code>start</code)(inclusive)
087: * and <code>end</code>(exclusive).
088: * @param text the String object to search in
089: * @param start the starting index of the search range, inclusive
090: * @param end the ending index of the search range, exclusive
091: * @return an <code>StringMatcher.Position</code> object that keeps the starting
092: * (inclusive) and ending positions (exclusive) of the first occurrence of the
093: * pattern in the specified range of the text; return null if not found or subtext
094: * is empty (start==end). A pair of zeros is returned if pattern is empty string
095: * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
096: * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
097: */
098: public StringMatcher.Position find(String text, int start, int end) {
099: if (text == null)
100: throw new IllegalArgumentException();
101:
102: int tlen = text.length();
103: if (start < 0)
104: start = 0;
105: if (end > tlen)
106: end = tlen;
107: if (end < 0 || start >= end)
108: return null;
109: if (fLength == 0)
110: return new Position(start, start);
111: if (fIgnoreWildCards) {
112: int x = posIn(text, start, end);
113: if (x < 0)
114: return null;
115: return new Position(x, x + fLength);
116: }
117:
118: int segCount = fSegments.length;
119: if (segCount == 0)//pattern contains only '*'(s)
120: return new Position(start, end);
121:
122: int curPos = start;
123: int matchStart = -1;
124: int i;
125: for (i = 0; i < segCount && curPos < end; ++i) {
126: String current = fSegments[i];
127: int nextMatch = regExpPosIn(text, curPos, end, current);
128: if (nextMatch < 0)
129: return null;
130: if (i == 0)
131: matchStart = nextMatch;
132: curPos = nextMatch + current.length();
133: }
134: if (i < segCount)
135: return null;
136: return new Position(matchStart, curPos);
137: }
138:
139: /**
140: * match the given <code>text</code> with the pattern
141: * @return true if matched eitherwise false
142: * @param text a String object
143: */
144: public boolean match(String text) {
145: return match(text, 0, text.length());
146: }
147:
148: /**
149: * Given the starting (inclusive) and the ending (exclusive) positions in the
150: * <code>text</code>, determine if the given substring matches with aPattern
151: * @return true if the specified portion of the text matches the pattern
152: * @param text a String object that contains the substring to match
153: * @param start marks the starting position (inclusive) of the substring
154: * @param end marks the ending index (exclusive) of the substring
155: */
156: public boolean match(String text, int start, int end) {
157: if (null == text)
158: throw new IllegalArgumentException();
159:
160: if (start > end)
161: return false;
162:
163: if (fIgnoreWildCards)
164: return (end - start == fLength)
165: && fPattern.regionMatches(fIgnoreCase, 0, text,
166: start, fLength);
167: int segCount = fSegments.length;
168: if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
169: return true;
170: if (start == end)
171: return fLength == 0;
172: if (fLength == 0)
173: return start == end;
174:
175: int tlen = text.length();
176: if (start < 0)
177: start = 0;
178: if (end > tlen)
179: end = tlen;
180:
181: int tCurPos = start;
182: int bound = end - fBound;
183: if (bound < 0)
184: return false;
185: int i = 0;
186: String current = fSegments[i];
187: int segLength = current.length();
188:
189: /* process first segment */
190: if (!fHasLeadingStar) {
191: if (!regExpRegionMatches(text, start, current, 0, segLength)) {
192: return false;
193: } else {
194: ++i;
195: tCurPos = tCurPos + segLength;
196: }
197: }
198: if ((fSegments.length == 1) && (!fHasLeadingStar)
199: && (!fHasTrailingStar)) {
200: // only one segment to match, no wildcards specified
201: return tCurPos == end;
202: }
203: /* process middle segments */
204: while (i < segCount) {
205: current = fSegments[i];
206: int currentMatch;
207: int k = current.indexOf(fSingleWildCard);
208: if (k < 0) {
209: currentMatch = textPosIn(text, tCurPos, end, current);
210: if (currentMatch < 0)
211: return false;
212: } else {
213: currentMatch = regExpPosIn(text, tCurPos, end, current);
214: if (currentMatch < 0)
215: return false;
216: }
217: tCurPos = currentMatch + current.length();
218: i++;
219: }
220:
221: /* process final segment */
222: if (!fHasTrailingStar && tCurPos != end) {
223: int clen = current.length();
224: return regExpRegionMatches(text, end - clen, current, 0,
225: clen);
226: }
227: return i == segCount;
228: }
229:
230: /**
231: * This method parses the given pattern into segments seperated by wildcard '*' characters.
232: * Since wildcards are not being used in this case, the pattern consists of a single segment.
233: */
234: private void parseNoWildCards() {
235: fSegments = new String[1];
236: fSegments[0] = fPattern;
237: fBound = fLength;
238: }
239:
240: /**
241: * Parses the given pattern into segments seperated by wildcard '*' characters.
242: */
243: private void parseWildCards() {
244: if (fPattern.startsWith("*"))//$NON-NLS-1$
245: fHasLeadingStar = true;
246: if (fPattern.endsWith("*")) {//$NON-NLS-1$
247: /* make sure it's not an escaped wildcard */
248: if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
249: fHasTrailingStar = true;
250: }
251: }
252:
253: Vector temp = new Vector();
254:
255: int pos = 0;
256: StringBuffer buf = new StringBuffer();
257: while (pos < fLength) {
258: char c = fPattern.charAt(pos++);
259: switch (c) {
260: case '\\':
261: if (pos >= fLength) {
262: buf.append(c);
263: } else {
264: char next = fPattern.charAt(pos++);
265: /* if it's an escape sequence */
266: if (next == '*' || next == '?' || next == '\\') {
267: buf.append(next);
268: } else {
269: /* not an escape sequence, just insert literally */
270: buf.append(c);
271: buf.append(next);
272: }
273: }
274: break;
275: case '*':
276: if (buf.length() > 0) {
277: /* new segment */
278: temp.addElement(buf.toString());
279: fBound += buf.length();
280: buf.setLength(0);
281: }
282: break;
283: case '?':
284: /* append special character representing single match wildcard */
285: buf.append(fSingleWildCard);
286: break;
287: default:
288: buf.append(c);
289: }
290: }
291:
292: /* add last buffer to segment list */
293: if (buf.length() > 0) {
294: temp.addElement(buf.toString());
295: fBound += buf.length();
296: }
297:
298: fSegments = new String[temp.size()];
299: temp.copyInto(fSegments);
300: }
301:
302: /**
303: * @param text a string which contains no wildcard
304: * @param start the starting index in the text for search, inclusive
305: * @param end the stopping point of search, exclusive
306: * @return the starting index in the text of the pattern , or -1 if not found
307: */
308: protected int posIn(String text, int start, int end) {//no wild card in pattern
309: int max = end - fLength;
310:
311: if (!fIgnoreCase) {
312: int i = text.indexOf(fPattern, start);
313: if (i == -1 || i > max)
314: return -1;
315: return i;
316: }
317:
318: for (int i = start; i <= max; ++i) {
319: if (text.regionMatches(true, i, fPattern, 0, fLength))
320: return i;
321: }
322:
323: return -1;
324: }
325:
326: /**
327: * @param text a simple regular expression that may only contain '?'(s)
328: * @param start the starting index in the text for search, inclusive
329: * @param end the stopping point of search, exclusive
330: * @param p a simple regular expression that may contains '?'
331: * @return the starting index in the text of the pattern , or -1 if not found
332: */
333: protected int regExpPosIn(String text, int start, int end, String p) {
334: int plen = p.length();
335:
336: int max = end - plen;
337: for (int i = start; i <= max; ++i) {
338: if (regExpRegionMatches(text, i, p, 0, plen))
339: return i;
340: }
341: return -1;
342: }
343:
344: protected boolean regExpRegionMatches(String text, int tStart,
345: String p, int pStart, int plen) {
346: while (plen-- > 0) {
347: char tchar = text.charAt(tStart++);
348: char pchar = p.charAt(pStart++);
349:
350: /* process wild cards */
351: if (!fIgnoreWildCards) {
352: /* skip single wild cards */
353: if (pchar == fSingleWildCard) {
354: continue;
355: }
356: }
357: if (pchar == tchar)
358: continue;
359: if (fIgnoreCase) {
360: if (Character.toUpperCase(tchar) == Character
361: .toUpperCase(pchar))
362: continue;
363: // comparing after converting to upper case doesn't handle all cases;
364: // also compare after converting to lower case
365: if (Character.toLowerCase(tchar) == Character
366: .toLowerCase(pchar))
367: continue;
368: }
369: return false;
370: }
371: return true;
372: }
373:
374: /**
375: * @param text the string to match
376: * @param start the starting index in the text for search, inclusive
377: * @param end the stopping point of search, exclusive
378: * @param p a string that has no wildcard
379: * @return the starting index in the text of the pattern , or -1 if not found
380: */
381: protected int textPosIn(String text, int start, int end, String p) {
382:
383: int plen = p.length();
384: int max = end - plen;
385:
386: if (!fIgnoreCase) {
387: int i = text.indexOf(p, start);
388: if (i == -1 || i > max)
389: return -1;
390: return i;
391: }
392:
393: for (int i = start; i <= max; ++i) {
394: if (text.regionMatches(true, i, p, 0, plen))
395: return i;
396: }
397:
398: return -1;
399: }
400: }
|