001: /*******************************************************************************
002: * Copyright (c) 2000, 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.ui.views.navigator;
011:
012: import java.util.Vector;
013:
014: /**
015: * A string pattern matcher, suppporting ?*? and ??? wildcards.
016: */
017: /* package */class StringMatcher {
018: protected String fPattern;
019:
020: protected int fLength; // pattern length
021:
022: protected boolean fIgnoreWildCards;
023:
024: protected boolean fIgnoreCase;
025:
026: protected boolean fHasLeadingStar;
027:
028: protected boolean fHasTrailingStar;
029:
030: protected String fSegments[]; //the given pattern is split into * separated segments
031:
032: /* boundary value beyond which we don't need to search in the text */
033: protected int fBound = 0;
034:
035: protected static final char fSingleWildCard = '\u0000';
036:
037: public static class Position {
038: int start; //inclusive
039:
040: int end; //exclusive
041:
042: public Position(int start, int end) {
043: this .start = start;
044: this .end = end;
045: }
046:
047: public int getStart() {
048: return start;
049: }
050:
051: public int getEnd() {
052: return end;
053: }
054: }
055:
056: /**
057: * StringMatcher constructor takes in a String object that is a simple
058: * pattern which may contain '*' for 0 and many characters and
059: * '?' for exactly one character.
060: *
061: * Literal '*' and '?' characters must be escaped in the pattern
062: * e.g., "\*" means literal "*", etc.
063: *
064: * Escaping any other character (including the escape character itself),
065: * just results in that character in the pattern.
066: * e.g., "\a" means "a" and "\\" means "\"
067: *
068: * If invoking the StringMatcher with string literals in Java, don't forget
069: * escape characters are represented by "\\".
070: *
071: * @param pattern the pattern to match text against
072: * @param ignoreCase if true, case is ignored
073: * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
074: * (everything is taken literally).
075: */
076: public StringMatcher(String pattern, boolean ignoreCase,
077: boolean ignoreWildCards) {
078: if (pattern == null) {
079: throw new IllegalArgumentException();
080: }
081: fIgnoreCase = ignoreCase;
082: fIgnoreWildCards = ignoreWildCards;
083: fPattern = pattern;
084: fLength = pattern.length();
085:
086: if (fIgnoreWildCards) {
087: parseNoWildCards();
088: } else {
089: parseWildCards();
090: }
091: }
092:
093: /**
094: * Find the first occurrence of the pattern between <code>start</code)(inclusive)
095: * and <code>end</code>(exclusive).
096: * @param <code>text</code>, the String object to search in
097: * @param <code>start</code>, the starting index of the search range, inclusive
098: * @param <code>end</code>, the ending index of the search range, exclusive
099: * @return an <code>StringMatcher.Position</code> object that keeps the starting
100: * (inclusive) and ending positions (exclusive) of the first occurrence of the
101: * pattern in the specified range of the text; return null if not found or subtext
102: * is empty (start==end). A pair of zeros is returned if pattern is empty string
103: * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
104: * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
105: */
106: public StringMatcher.Position find(String text, int start, int end) {
107: if (text == null) {
108: throw new IllegalArgumentException();
109: }
110:
111: int tlen = text.length();
112: if (start < 0) {
113: start = 0;
114: }
115: if (end > tlen) {
116: end = tlen;
117: }
118: if (end < 0 || start >= end) {
119: return null;
120: }
121: if (fLength == 0) {
122: return new Position(start, start);
123: }
124: if (fIgnoreWildCards) {
125: int x = posIn(text, start, end);
126: if (x < 0) {
127: return null;
128: }
129: return new Position(x, x + fLength);
130: }
131:
132: int segCount = fSegments.length;
133: if (segCount == 0) {
134: return new Position(start, end);
135: }
136:
137: int curPos = start;
138: int matchStart = -1;
139: int i;
140: for (i = 0; i < segCount && curPos < end; ++i) {
141: String current = fSegments[i];
142: int nextMatch = regExpPosIn(text, curPos, end, current);
143: if (nextMatch < 0) {
144: return null;
145: }
146: if (i == 0) {
147: matchStart = nextMatch;
148: }
149: curPos = nextMatch + current.length();
150: }
151: if (i < segCount) {
152: return null;
153: }
154: return new Position(matchStart, curPos);
155: }
156:
157: /**
158: * match the given <code>text</code> with the pattern
159: * @return true if matched eitherwise false
160: * @param <code>text</code>, a String object
161: */
162: public boolean match(String text) {
163: return match(text, 0, text.length());
164: }
165:
166: /**
167: * Given the starting (inclusive) and the ending (exclusive) poisitions in the
168: * <code>text</code>, determine if the given substring matches with aPattern
169: * @return true if the specified portion of the text matches the pattern
170: * @param String <code>text</code>, a String object that contains the substring to match
171: * @param int <code>start<code> marks the starting position (inclusive) of the substring
172: * @param int <code>end<code> marks the ending index (exclusive) of the substring
173: */
174: public boolean match(String text, int start, int end) {
175: if (null == text) {
176: throw new IllegalArgumentException();
177: }
178:
179: if (start > end) {
180: return false;
181: }
182:
183: if (fIgnoreWildCards) {
184: return (end - start == fLength)
185: && fPattern.regionMatches(fIgnoreCase, 0, text,
186: start, fLength);
187: }
188: int segCount = fSegments.length;
189: if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) {
190: return true;
191: }
192: if (start == end) {
193: return fLength == 0;
194: }
195: if (fLength == 0) {
196: return start == end;
197: }
198:
199: int tlen = text.length();
200: if (start < 0) {
201: start = 0;
202: }
203: if (end > tlen) {
204: end = tlen;
205: }
206:
207: int tCurPos = start;
208: int bound = end - fBound;
209: if (bound < 0) {
210: return false;
211: }
212: int i = 0;
213: String current = fSegments[i];
214: int segLength = current.length();
215:
216: /* process first segment */
217: if (!fHasLeadingStar) {
218: if (!regExpRegionMatches(text, start, current, 0, segLength)) {
219: return false;
220: } else {
221: ++i;
222: tCurPos = tCurPos + segLength;
223: }
224: }
225: if ((fSegments.length == 1) && (!fHasLeadingStar)
226: && (!fHasTrailingStar)) {
227: // only one segment to match, no wildcards specified
228: return tCurPos == end;
229: }
230: /* process middle segments */
231: for (; i < segCount && tCurPos <= bound; ++i) {
232: current = fSegments[i];
233: int currentMatch;
234: int k = current.indexOf(fSingleWildCard);
235: if (k < 0) {
236: currentMatch = textPosIn(text, tCurPos, end, current);
237: if (currentMatch < 0) {
238: return false;
239: }
240: } else {
241: currentMatch = regExpPosIn(text, tCurPos, end, current);
242: if (currentMatch < 0) {
243: return false;
244: }
245: }
246: tCurPos = currentMatch + current.length();
247: }
248:
249: /* process final segment */
250: if (!fHasTrailingStar && tCurPos != end) {
251: int clen = current.length();
252: return regExpRegionMatches(text, end - clen, current, 0,
253: clen);
254: }
255: return i == segCount;
256: }
257:
258: /**
259: * This method parses the given pattern into segments seperated by wildcard '*' characters.
260: * Since wildcards are not being used in this case, the pattern consists of a single segment.
261: */
262: private void parseNoWildCards() {
263: fSegments = new String[1];
264: fSegments[0] = fPattern;
265: fBound = fLength;
266: }
267:
268: /**
269: * Parses the given pattern into segments seperated by wildcard '*' characters.
270: * @param p, a String object that is a simple regular expression with '*' and/or '?'
271: */
272: private void parseWildCards() {
273: if (fPattern.startsWith("*")) { //$NON-NLS-1$
274: fHasLeadingStar = true;
275: }
276: if (fPattern.endsWith("*")) {//$NON-NLS-1$
277: /* make sure it's not an escaped wildcard */
278: if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
279: fHasTrailingStar = true;
280: }
281: }
282:
283: Vector temp = new Vector();
284:
285: int pos = 0;
286: StringBuffer buf = new StringBuffer();
287: while (pos < fLength) {
288: char c = fPattern.charAt(pos++);
289: switch (c) {
290: case '\\':
291: if (pos >= fLength) {
292: buf.append(c);
293: } else {
294: char next = fPattern.charAt(pos++);
295: /* if it's an escape sequence */
296: if (next == '*' || next == '?' || next == '\\') {
297: buf.append(next);
298: } else {
299: /* not an escape sequence, just insert literally */
300: buf.append(c);
301: buf.append(next);
302: }
303: }
304: break;
305: case '*':
306: if (buf.length() > 0) {
307: /* new segment */
308: temp.addElement(buf.toString());
309: fBound += buf.length();
310: buf.setLength(0);
311: }
312: break;
313: case '?':
314: /* append special character representing single match wildcard */
315: buf.append(fSingleWildCard);
316: break;
317: default:
318: buf.append(c);
319: }
320: }
321:
322: /* add last buffer to segment list */
323: if (buf.length() > 0) {
324: temp.addElement(buf.toString());
325: fBound += buf.length();
326: }
327:
328: fSegments = new String[temp.size()];
329: temp.copyInto(fSegments);
330: }
331:
332: /**
333: * @param <code>text</code>, a string which contains no wildcard
334: * @param <code>start</code>, the starting index in the text for search, inclusive
335: * @param <code>end</code>, the stopping point of search, exclusive
336: * @return the starting index in the text of the pattern , or -1 if not found
337: */
338: protected int posIn(String text, int start, int end) {//no wild card in pattern
339: int max = end - fLength;
340:
341: if (!fIgnoreCase) {
342: int i = text.indexOf(fPattern, start);
343: if (i == -1 || i > max) {
344: return -1;
345: }
346: return i;
347: }
348:
349: for (int i = start; i <= max; ++i) {
350: if (text.regionMatches(true, i, fPattern, 0, fLength)) {
351: return i;
352: }
353: }
354:
355: return -1;
356: }
357:
358: /**
359: * @param <code>text</code>, a simple regular expression that may only contain '?'(s)
360: * @param <code>start</code>, the starting index in the text for search, inclusive
361: * @param <code>end</code>, the stopping point of search, exclusive
362: * @param <code>p</code>, a simple regular expression that may contains '?'
363: * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
364: * @return the starting index in the text of the pattern , or -1 if not found
365: */
366: protected int regExpPosIn(String text, int start, int end, String p) {
367: int plen = p.length();
368:
369: int max = end - plen;
370: for (int i = start; i <= max; ++i) {
371: if (regExpRegionMatches(text, i, p, 0, plen)) {
372: return i;
373: }
374: }
375: return -1;
376: }
377:
378: /**
379: *
380: * @return boolean
381: * @param <code>text</code>, a String to match
382: * @param <code>start</code>, int that indicates the starting index of match, inclusive
383: * @param <code>end</code> int that indicates the ending index of match, exclusive
384: * @param <code>p</code>, String, String, a simple regular expression that may contain '?'
385: * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
386: */
387: protected boolean regExpRegionMatches(String text, int tStart,
388: String p, int pStart, int plen) {
389: while (plen-- > 0) {
390: char tchar = text.charAt(tStart++);
391: char pchar = p.charAt(pStart++);
392:
393: /* process wild cards */
394: if (!fIgnoreWildCards) {
395: /* skip single wild cards */
396: if (pchar == fSingleWildCard) {
397: continue;
398: }
399: }
400: if (pchar == tchar) {
401: continue;
402: }
403: if (fIgnoreCase) {
404: if (Character.toUpperCase(tchar) == Character
405: .toUpperCase(pchar)) {
406: continue;
407: }
408: // comparing after converting to upper case doesn't handle all cases;
409: // also compare after converting to lower case
410: if (Character.toLowerCase(tchar) == Character
411: .toLowerCase(pchar)) {
412: continue;
413: }
414: }
415: return false;
416: }
417: return true;
418: }
419:
420: /**
421: * @param <code>text</code>, the string to match
422: * @param <code>start</code>, the starting index in the text for search, inclusive
423: * @param <code>end</code>, the stopping point of search, exclusive
424: * @param code>p</code>, a string that has no wildcard
425: * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
426: * @return the starting index in the text of the pattern , or -1 if not found
427: */
428: protected int textPosIn(String text, int start, int end, String p) {
429:
430: int plen = p.length();
431: int max = end - plen;
432:
433: if (!fIgnoreCase) {
434: int i = text.indexOf(p, start);
435: if (i == -1 || i > max) {
436: return -1;
437: }
438: return i;
439: }
440:
441: for (int i = 0; i <= max; ++i) {
442: if (text.regionMatches(true, i, p, 0, plen)) {
443: return i;
444: }
445: }
446:
447: return -1;
448: }
449: }
|