001: /*******************************************************************************
002: * Copyright (c) 2000, 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.pde.internal.ui.util;
011:
012: import java.util.Vector;
013:
014: /**
015: * Copied from JDT UI
016: * A string pattern matcher. Supports '*' and '?' wildcards.
017: */
018: public class StringMatcher {
019: protected String fPattern;
020: protected int fLength; // pattern length
021: protected boolean fIgnoreWildCards;
022: protected boolean fIgnoreCase;
023: protected boolean fHasLeadingStar;
024: protected boolean fHasTrailingStar;
025: protected String fSegments[]; //the given pattern is split into * separated segments
026:
027: /* boundary value beyond which we don't need to search in the text */
028: protected int fBound = 0;
029:
030: protected static final char fSingleWildCard = '\u0000';
031:
032: public static class Position {
033: int start; //inclusive
034: int end; //exclusive
035:
036: public Position(int start, int end) {
037: this .start = start;
038: this .end = end;
039: }
040:
041: public int getStart() {
042: return start;
043: }
044:
045: public int getEnd() {
046: return end;
047: }
048: }
049:
050: /**
051: * StringMatcher constructor takes in a String object that is a simple
052: * pattern. The pattern may contain '*' for 0 and many characters and
053: * '?' for exactly one character.
054: *
055: * Literal '*' and '?' characters must be escaped in the pattern
056: * e.g., "\*" means literal "*", etc.
057: *
058: * Escaping any other character (including the escape character itself),
059: * just results in that character in the pattern.
060: * e.g., "\a" means "a" and "\\" means "\"
061: *
062: * If invoking the StringMatcher with string literals in Java, don't forget
063: * escape characters are represented by "\\".
064: *
065: * @param pattern the pattern to match text against
066: * @param ignoreCase if true, case is ignored
067: * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
068: * (everything is taken literally).
069: */
070: public StringMatcher(String pattern, boolean ignoreCase,
071: boolean ignoreWildCards) {
072: if (pattern == null)
073: throw new IllegalArgumentException();
074: fIgnoreCase = ignoreCase;
075: fIgnoreWildCards = ignoreWildCards;
076: fPattern = pattern;
077: fLength = pattern.length();
078:
079: if (fIgnoreWildCards) {
080: parseNoWildCards();
081: } else {
082: parseWildCards();
083: }
084: }
085:
086: /**
087: * Find the first occurrence of the pattern between <code>start</code)(inclusive)
088: * and <code>end</code>(exclusive).
089: * @param text the String object to search in
090: * @param start the starting index of the search range, inclusive
091: * @param end the ending index of the search range, exclusive
092: * @return an <code>StringMatcher.Position</code> object that keeps the starting
093: * (inclusive) and ending positions (exclusive) of the first occurrence of the
094: * pattern in the specified range of the text; return null if not found or subtext
095: * is empty (start==end). A pair of zeros is returned if pattern is empty string
096: * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
097: * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
098: */
099: public StringMatcher.Position find(String text, int start, int end) {
100: if (text == null)
101: throw new IllegalArgumentException();
102:
103: int tlen = text.length();
104: if (start < 0)
105: start = 0;
106: if (end > tlen)
107: end = tlen;
108: if (end < 0 || start >= end)
109: return null;
110: if (fLength == 0)
111: return new Position(start, start);
112: if (fIgnoreWildCards) {
113: int x = posIn(text, start, end);
114: if (x < 0)
115: return null;
116: return new Position(x, x + fLength);
117: }
118:
119: int segCount = fSegments.length;
120: if (segCount == 0)//pattern contains only '*'(s)
121: return new Position(start, end);
122:
123: int curPos = start;
124: int matchStart = -1;
125: int i;
126: for (i = 0; i < segCount && curPos < end; ++i) {
127: String current = fSegments[i];
128: int nextMatch = regExpPosIn(text, curPos, end, current);
129: if (nextMatch < 0)
130: return null;
131: if (i == 0)
132: matchStart = nextMatch;
133: curPos = nextMatch + current.length();
134: }
135: if (i < segCount)
136: return null;
137: return new Position(matchStart, curPos);
138: }
139:
140: /**
141: * match the given <code>text</code> with the pattern
142: * @return true if matched eitherwise false
143: * @param text a String object
144: */
145: public boolean match(String text) {
146: return match(text, 0, text.length());
147: }
148:
149: /**
150: * Given the starting (inclusive) and the ending (exclusive) positions in the
151: * <code>text</code>, determine if the given substring matches with aPattern
152: * @return true if the specified portion of the text matches the pattern
153: * @param text a String object that contains the substring to match
154: * @param start marks the starting position (inclusive) of the substring
155: * @param end marks the ending index (exclusive) of the substring
156: */
157: public boolean match(String text, int start, int end) {
158: if (null == text)
159: throw new IllegalArgumentException();
160:
161: if (start > end)
162: return false;
163:
164: if (fIgnoreWildCards)
165: return (end - start == fLength)
166: && fPattern.regionMatches(fIgnoreCase, 0, text,
167: start, fLength);
168: int segCount = fSegments.length;
169: if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
170: return true;
171: if (start == end)
172: return fLength == 0;
173: if (fLength == 0)
174: return start == end;
175:
176: int tlen = text.length();
177: if (start < 0)
178: start = 0;
179: if (end > tlen)
180: end = tlen;
181:
182: int tCurPos = start;
183: int bound = end - fBound;
184: if (bound < 0)
185: return false;
186: int i = 0;
187: String current = fSegments[i];
188: int segLength = current.length();
189:
190: /* process first segment */
191: if (!fHasLeadingStar) {
192: if (!regExpRegionMatches(text, start, current, 0, segLength)) {
193: return false;
194: }
195: ++i;
196: tCurPos = tCurPos + segLength;
197: }
198: if ((fSegments.length == 1) && (!fHasLeadingStar)
199: && (!fHasTrailingStar)) {
200: // only one segment to match, no wildcards specified
201: return tCurPos == end;
202: }
203: /* process middle segments */
204: while (i < segCount) {
205: current = fSegments[i];
206: int currentMatch;
207: int k = current.indexOf(fSingleWildCard);
208: if (k < 0) {
209: currentMatch = textPosIn(text, tCurPos, end, current);
210: if (currentMatch < 0)
211: return false;
212: } else {
213: currentMatch = regExpPosIn(text, tCurPos, end, current);
214: if (currentMatch < 0)
215: return false;
216: }
217: tCurPos = currentMatch + current.length();
218: i++;
219: }
220:
221: /* process final segment */
222: if (!fHasTrailingStar && tCurPos != end) {
223: int clen = current.length();
224: return regExpRegionMatches(text, end - clen, current, 0,
225: clen);
226: }
227: return i == segCount;
228: }
229:
230: /**
231: * This method parses the given pattern into segments seperated by wildcard '*' characters.
232: * Since wildcards are not being used in this case, the pattern consists of a single segment.
233: */
234: private void parseNoWildCards() {
235: fSegments = new String[1];
236: fSegments[0] = fPattern;
237: fBound = fLength;
238: }
239:
240: /**
241: * Parses the given pattern into segments seperated by wildcard '*' characters.
242: */
243: private void parseWildCards() {
244: if (fPattern.startsWith("*"))//$NON-NLS-1$
245: fHasLeadingStar = true;
246: if (fPattern.endsWith("*")) {//$NON-NLS-1$
247: /* make sure it's not an escaped wildcard */
248: if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
249: fHasTrailingStar = true;
250: }
251: }
252:
253: Vector temp = new Vector();
254:
255: int pos = 0;
256: StringBuffer buf = new StringBuffer();
257: while (pos < fLength) {
258: char c = fPattern.charAt(pos++);
259: switch (c) {
260: case '\\':
261: if (pos >= fLength) {
262: buf.append(c);
263: } else {
264: char next = fPattern.charAt(pos++);
265: /* if it's an escape sequence */
266: if (next == '*' || next == '?' || next == '\\') {
267: buf.append(next);
268: } else {
269: /* not an escape sequence, just insert literally */
270: buf.append(c);
271: buf.append(next);
272: }
273: }
274: break;
275: case '*':
276: if (buf.length() > 0) {
277: /* new segment */
278: temp.addElement(buf.toString());
279: fBound += buf.length();
280: buf.setLength(0);
281: }
282: break;
283: case '?':
284: /* append special character representing single match wildcard */
285: buf.append(fSingleWildCard);
286: break;
287: default:
288: buf.append(c);
289: }
290: }
291:
292: /* add last buffer to segment list */
293: if (buf.length() > 0) {
294: temp.addElement(buf.toString());
295: fBound += buf.length();
296: }
297:
298: fSegments = new String[temp.size()];
299: temp.copyInto(fSegments);
300: }
301:
302: /**
303: * @param text a string which contains no wildcard
304: * @param start the starting index in the text for search, inclusive
305: * @param end the stopping point of search, exclusive
306: * @return the starting index in the text of the pattern , or -1 if not found
307: */
308: protected int posIn(String text, int start, int end) {//no wild card in pattern
309: int max = end - fLength;
310:
311: if (!fIgnoreCase) {
312: int i = text.indexOf(fPattern, start);
313: if (i == -1 || i > max)
314: return -1;
315: return i;
316: }
317:
318: for (int i = start; i <= max; ++i) {
319: if (text.regionMatches(true, i, fPattern, 0, fLength))
320: return i;
321: }
322:
323: return -1;
324: }
325:
326: /**
327: * @param text a simple regular expression that may only contain '?'(s)
328: * @param start the starting index in the text for search, inclusive
329: * @param end the stopping point of search, exclusive
330: * @param p a simple regular expression that may contains '?'
331: * @return the starting index in the text of the pattern , or -1 if not found
332: */
333: protected int regExpPosIn(String text, int start, int end, String p) {
334: int plen = p.length();
335:
336: int max = end - plen;
337: for (int i = start; i <= max; ++i) {
338: if (regExpRegionMatches(text, i, p, 0, plen))
339: return i;
340: }
341: return -1;
342: }
343:
344: protected boolean regExpRegionMatches(String text, int tStart,
345: String p, int pStart, int plen) {
346: while (plen-- > 0) {
347: char tchar = text.charAt(tStart++);
348: char pchar = p.charAt(pStart++);
349:
350: /* process wild cards */
351: if (!fIgnoreWildCards) {
352: /* skip single wild cards */
353: if (pchar == fSingleWildCard) {
354: continue;
355: }
356: }
357: if (pchar == tchar)
358: continue;
359: if (fIgnoreCase) {
360: if (Character.toUpperCase(tchar) == Character
361: .toUpperCase(pchar))
362: continue;
363: // comparing after converting to upper case doesn't handle all cases;
364: // also compare after converting to lower case
365: if (Character.toLowerCase(tchar) == Character
366: .toLowerCase(pchar))
367: continue;
368: }
369: return false;
370: }
371: return true;
372: }
373:
374: /**
375: * @param text the string to match
376: * @param start the starting index in the text for search, inclusive
377: * @param end the stopping point of search, exclusive
378: * @param p a string that has no wildcard
379: * @return the starting index in the text of the pattern , or -1 if not found
380: */
381: protected int textPosIn(String text, int start, int end, String p) {
382:
383: int plen = p.length();
384: int max = end - plen;
385:
386: if (!fIgnoreCase) {
387: int i = text.indexOf(p, start);
388: if (i == -1 || i > max)
389: return -1;
390: return i;
391: }
392:
393: for (int i = start; i <= max; ++i) {
394: if (text.regionMatches(true, i, p, 0, plen))
395: return i;
396: }
397:
398: return -1;
399: }
400: }
|