001: package net.refractions.udig.style.sld.editor.internal;
002:
003: /*******************************************************************************
004: * Copyright (c) 2000, 2004 IBM Corporation and others.
005: * All rights reserved. This program and the accompanying materials
006: * are made available under the terms of the Eclipse Public License v1.0
007: * which accompanies this distribution, and is available at
008: * http://www.eclipse.org/legal/epl-v10.html
009: *
010: * Contributors:
011: * IBM Corporation - initial API and implementation
012: *******************************************************************************/
013:
014: import java.util.Vector;
015:
016: /**
017: * A string pattern matcher, suppporting "*" and "?" wildcards.
018: */
019: public class StringMatcher {
020: protected String fPattern;
021:
022: protected int fLength; // pattern length
023:
024: protected boolean fIgnoreWildCards;
025:
026: protected boolean fIgnoreCase;
027:
028: protected boolean fHasLeadingStar;
029:
030: protected boolean fHasTrailingStar;
031:
032: protected String fSegments[]; //the given pattern is split into * separated segments
033:
034: /* boundary value beyond which we don't need to search in the text */
035: protected int fBound = 0;
036:
037: protected static final char fSingleWildCard = '\u0000';
038:
039: public static class Position {
040: int start; //inclusive
041:
042: int end; //exclusive
043:
044: public Position(int start, int end) {
045: this .start = start;
046: this .end = end;
047: }
048:
049: public int getStart() {
050: return start;
051: }
052:
053: public int getEnd() {
054: return end;
055: }
056: }
057:
058: /**
059: * StringMatcher constructor takes in a String object that is a simple
060: * pattern which may contain '*' for 0 and many characters and
061: * '?' for exactly one character.
062: *
063: * Literal '*' and '?' characters must be escaped in the pattern
064: * e.g., "\*" means literal "*", etc.
065: *
066: * Escaping any other character (including the escape character itself),
067: * just results in that character in the pattern.
068: * e.g., "\a" means "a" and "\\" means "\"
069: *
070: * If invoking the StringMatcher with string literals in Java, don't forget
071: * escape characters are represented by "\\".
072: *
073: * @param pattern the pattern to match text against
074: * @param ignoreCase if true, case is ignored
075: * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
076: * (everything is taken literally).
077: */
078: public StringMatcher(String pattern, boolean ignoreCase,
079: boolean ignoreWildCards) {
080: if (pattern == null)
081: throw new IllegalArgumentException();
082: fIgnoreCase = ignoreCase;
083: fIgnoreWildCards = ignoreWildCards;
084: fPattern = pattern;
085: fLength = pattern.length();
086:
087: if (fIgnoreWildCards) {
088: parseNoWildCards();
089: } else {
090: parseWildCards();
091: }
092: }
093:
094: /**
095: * Find the first occurrence of the pattern between <code>start</code)(inclusive)
096: * and <code>end</code>(exclusive).
097: * @param <code>text</code>, the String object to search in
098: * @param <code>start</code>, the starting index of the search range, inclusive
099: * @param <code>end</code>, the ending index of the search range, exclusive
100: * @return an <code>StringMatcher.Position</code> object that keeps the starting
101: * (inclusive) and ending positions (exclusive) of the first occurrence of the
102: * pattern in the specified range of the text; return null if not found or subtext
103: * is empty (start==end). A pair of zeros is returned if pattern is empty string
104: * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
105: * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
106: */
107: public StringMatcher.Position find(String text, int start, int end) {
108: if (text == null)
109: throw new IllegalArgumentException();
110:
111: int tlen = text.length();
112: if (start < 0)
113: start = 0;
114: if (end > tlen)
115: end = tlen;
116: if (end < 0 || start >= end)
117: return null;
118: if (fLength == 0)
119: return new Position(start, start);
120: if (fIgnoreWildCards) {
121: int x = posIn(text, start, end);
122: if (x < 0)
123: return null;
124: return new Position(x, x + fLength);
125: }
126:
127: int segCount = fSegments.length;
128: if (segCount == 0)//pattern contains only '*'(s)
129: return new Position(start, end);
130:
131: int curPos = start;
132: int matchStart = -1;
133: int i;
134: for (i = 0; i < segCount && curPos < end; ++i) {
135: String current = fSegments[i];
136: int nextMatch = regExpPosIn(text, curPos, end, current);
137: if (nextMatch < 0)
138: return null;
139: if (i == 0)
140: matchStart = nextMatch;
141: curPos = nextMatch + current.length();
142: }
143: if (i < segCount)
144: return null;
145: return new Position(matchStart, curPos);
146: }
147:
148: /**
149: * match the given <code>text</code> with the pattern
150: * @return true if matched eitherwise false
151: * @param <code>text</code>, a String object
152: */
153: public boolean match(String text) {
154: if (text == null)
155: return false;
156: return match(text, 0, text.length());
157: }
158:
159: /**
160: * Given the starting (inclusive) and the ending (exclusive) positions in the
161: * <code>text</code>, determine if the given substring matches with aPattern
162: * @return true if the specified portion of the text matches the pattern
163: * @param String <code>text</code>, a String object that contains the substring to match
164: * @param int <code>start<code> marks the starting position (inclusive) of the substring
165: * @param int <code>end<code> marks the ending index (exclusive) of the substring
166: */
167: public boolean match(String text, int start, int end) {
168: if (null == text)
169: throw new IllegalArgumentException();
170:
171: if (start > end)
172: return false;
173:
174: if (fIgnoreWildCards)
175: return (end - start == fLength)
176: && fPattern.regionMatches(fIgnoreCase, 0, text,
177: start, fLength);
178: int segCount = fSegments.length;
179: if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
180: return true;
181: if (start == end)
182: return fLength == 0;
183: if (fLength == 0)
184: return start == end;
185:
186: int tlen = text.length();
187: if (start < 0)
188: start = 0;
189: if (end > tlen)
190: end = tlen;
191:
192: int tCurPos = start;
193: int bound = end - fBound;
194: if (bound < 0)
195: return false;
196: int i = 0;
197: String current = fSegments[i];
198: int segLength = current.length();
199:
200: /* process first segment */
201: if (!fHasLeadingStar) {
202: if (!regExpRegionMatches(text, start, current, 0, segLength)) {
203: return false;
204: } else {
205: ++i;
206: tCurPos = tCurPos + segLength;
207: }
208: }
209: if ((fSegments.length == 1) && (!fHasLeadingStar)
210: && (!fHasTrailingStar)) {
211: // only one segment to match, no wildcards specified
212: return tCurPos == end;
213: }
214: /* process middle segments */
215: while (i < segCount) {
216: current = fSegments[i];
217: int currentMatch;
218: int k = current.indexOf(fSingleWildCard);
219: if (k < 0) {
220: currentMatch = textPosIn(text, tCurPos, end, current);
221: if (currentMatch < 0)
222: return false;
223: } else {
224: currentMatch = regExpPosIn(text, tCurPos, end, current);
225: if (currentMatch < 0)
226: return false;
227: }
228: tCurPos = currentMatch + current.length();
229: i++;
230: }
231:
232: /* process final segment */
233: if (!fHasTrailingStar && tCurPos != end) {
234: int clen = current.length();
235: return regExpRegionMatches(text, end - clen, current, 0,
236: clen);
237: }
238: return i == segCount;
239: }
240:
241: /**
242: * This method parses the given pattern into segments seperated by wildcard '*' characters.
243: * Since wildcards are not being used in this case, the pattern consists of a single segment.
244: */
245: private void parseNoWildCards() {
246: fSegments = new String[1];
247: fSegments[0] = fPattern;
248: fBound = fLength;
249: }
250:
251: /**
252: * Parses the given pattern into segments seperated by wildcard '*' characters.
253: * @param p, a String object that is a simple regular expression with '*' and/or '?'
254: */
255: private void parseWildCards() {
256: if (fPattern.startsWith("*"))//$NON-NLS-1$
257: fHasLeadingStar = true;
258: if (fPattern.endsWith("*")) {//$NON-NLS-1$
259: /* make sure it's not an escaped wildcard */
260: if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
261: fHasTrailingStar = true;
262: }
263: }
264:
265: Vector temp = new Vector();
266:
267: int pos = 0;
268: StringBuffer buf = new StringBuffer();
269: while (pos < fLength) {
270: char c = fPattern.charAt(pos++);
271: switch (c) {
272: case '\\':
273: if (pos >= fLength) {
274: buf.append(c);
275: } else {
276: char next = fPattern.charAt(pos++);
277: /* if it's an escape sequence */
278: if (next == '*' || next == '?' || next == '\\') {
279: buf.append(next);
280: } else {
281: /* not an escape sequence, just insert literally */
282: buf.append(c);
283: buf.append(next);
284: }
285: }
286: break;
287: case '*':
288: if (buf.length() > 0) {
289: /* new segment */
290: temp.addElement(buf.toString());
291: fBound += buf.length();
292: buf.setLength(0);
293: }
294: break;
295: case '?':
296: /* append special character representing single match wildcard */
297: buf.append(fSingleWildCard);
298: break;
299: default:
300: buf.append(c);
301: }
302: }
303:
304: /* add last buffer to segment list */
305: if (buf.length() > 0) {
306: temp.addElement(buf.toString());
307: fBound += buf.length();
308: }
309:
310: fSegments = new String[temp.size()];
311: temp.copyInto(fSegments);
312: }
313:
314: /**
315: * @param <code>text</code>, a string which contains no wildcard
316: * @param <code>start</code>, the starting index in the text for search, inclusive
317: * @param <code>end</code>, the stopping point of search, exclusive
318: * @return the starting index in the text of the pattern , or -1 if not found
319: */
320: protected int posIn(String text, int start, int end) {//no wild card in pattern
321: int max = end - fLength;
322:
323: if (!fIgnoreCase) {
324: int i = text.indexOf(fPattern, start);
325: if (i == -1 || i > max)
326: return -1;
327: return i;
328: }
329:
330: for (int i = start; i <= max; ++i) {
331: if (text.regionMatches(true, i, fPattern, 0, fLength))
332: return i;
333: }
334:
335: return -1;
336: }
337:
338: /**
339: * @param <code>text</code>, a simple regular expression that may only contain '?'(s)
340: * @param <code>start</code>, the starting index in the text for search, inclusive
341: * @param <code>end</code>, the stopping point of search, exclusive
342: * @param <code>p</code>, a simple regular expression that may contains '?'
343: * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
344: * @return the starting index in the text of the pattern , or -1 if not found
345: */
346: protected int regExpPosIn(String text, int start, int end, String p) {
347: int plen = p.length();
348:
349: int max = end - plen;
350: for (int i = start; i <= max; ++i) {
351: if (regExpRegionMatches(text, i, p, 0, plen))
352: return i;
353: }
354: return -1;
355: }
356:
357: /**
358: *
359: * @return boolean
360: * @param <code>text</code>, a String to match
361: * @param <code>start</code>, int that indicates the starting index of match, inclusive
362: * @param <code>end</code> int that indicates the ending index of match, exclusive
363: * @param <code>p</code>, String, String, a simple regular expression that may contain '?'
364: * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
365: */
366: protected boolean regExpRegionMatches(String text, int tStart,
367: String p, int pStart, int plen) {
368: while (plen-- > 0) {
369: char tchar = text.charAt(tStart++);
370: char pchar = p.charAt(pStart++);
371:
372: /* process wild cards */
373: if (!fIgnoreWildCards) {
374: /* skip single wild cards */
375: if (pchar == fSingleWildCard) {
376: continue;
377: }
378: }
379: if (pchar == tchar)
380: continue;
381: if (fIgnoreCase) {
382: if (Character.toUpperCase(tchar) == Character
383: .toUpperCase(pchar))
384: continue;
385: // comparing after converting to upper case doesn't handle all cases;
386: // also compare after converting to lower case
387: if (Character.toLowerCase(tchar) == Character
388: .toLowerCase(pchar))
389: continue;
390: }
391: return false;
392: }
393: return true;
394: }
395:
396: /**
397: * @param <code>text</code>, the string to match
398: * @param <code>start</code>, the starting index in the text for search, inclusive
399: * @param <code>end</code>, the stopping point of search, exclusive
400: * @param code>p</code>, a string that has no wildcard
401: * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
402: * @return the starting index in the text of the pattern , or -1 if not found
403: */
404: protected int textPosIn(String text, int start, int end, String p) {
405:
406: int plen = p.length();
407: int max = end - plen;
408:
409: if (!fIgnoreCase) {
410: int i = text.indexOf(p, start);
411: if (i == -1 || i > max)
412: return -1;
413: return i;
414: }
415:
416: for (int i = start; i <= max; ++i) {
417: if (text.regionMatches(true, i, p, 0, plen))
418: return i;
419: }
420:
421: return -1;
422: }
423: }
|