001: /*
002: * <copyright>
003: *
004: * Copyright 2003-2004 BBNT Solutions, LLC
005: * under sponsorship of the Defense Advanced Research Projects
006: * Agency (DARPA).
007: *
008: * You can redistribute this software and/or modify it under the
009: * terms of the Cougaar Open Source License as published on the
010: * Cougaar Open Source Website (www.cougaar.org).
011: *
012: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
013: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
014: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
015: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
016: * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
017: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
018: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
019: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
020: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
021: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
022: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
023: *
024: * </copyright>
025: */
026:
027: package org.cougaar.community;
028:
029: import java.util.ArrayList;
030: import java.util.HashMap;
031: import java.util.List;
032: import java.util.Map;
033:
034: /**
035: * Matches unix-style glob patterns
036: **/
037: public class Glob {
038: /** The parsed segments of the pattern **/
039: private Segment[] segments;
040:
041: /** A cache of already parsed patterns **/
042: private static Map globs = new HashMap();
043:
044: /**
045: * Parse a glob pattern or find the previously parsed result. In
046: * glob patterns:
047: * <dl>
048: * <dt>*</dt><dd>matches any number of any character</dd>
049: * <dt>?</dt><dd>matches a single character</dd>
050: * <dt>[...]</dt><dd> Matches any single character in the enclosed
051: * list(s) or range(s). A list is a string of characters. A range
052: * is two characters separated by a dash (-), and includes all the
053: * characters in between in the collating sequence of the default
054: * charset. To include a ']' in the set, put it first. To include
055: * a '-', put it first or last.</dd>
056: * </dl>
057: * @param pattern the pattern to parse.
058: * @return the parsed result. The match method of the returned
059: * object should be used to check if any given string matches the
060: * pattern that was parsed.
061: **/
062: public static synchronized Glob parse(String pattern) {
063: Glob result = (Glob) globs.get(pattern);
064: if (result == null) {
065: result = new Glob(pattern);
066: globs.put(pattern, result);
067: }
068: return result;
069: }
070:
071: /**
072: * Construct a new Glob to parse a pattern. The constructor is
073: * private. Use the parse method to create or reuse a Glob. The
074: * parsed result consists of an array of Segments where each
075: * Segment corresponds to a portion of the pattern. There are
076: * four kinds of Segments corresponding "*", "?", "[...]" and
077: * ordinary characters.
078: * @param s Parse pattern
079: **/
080: private Glob(String s) {
081: List segmentList = new ArrayList();
082: int startPos = 0;
083: for (int i = 0, n = s.length(); i < n; i++) {
084: char c = s.charAt(i);
085: switch (c) {
086: case '*':
087: if (startPos < i) {
088: segmentList
089: .add(new Match(s.substring(startPos, i)));
090: }
091: segmentList.add(new Star());
092: startPos = i + 1;
093: break;
094: case '?':
095: if (startPos < i) {
096: segmentList
097: .add(new Match(s.substring(startPos, i)));
098: }
099: segmentList.add(new AnyOne());
100: startPos = i + 1;
101: break;
102: case '[':
103: if (startPos < i) {
104: segmentList
105: .add(new Match(s.substring(startPos, i)));
106: }
107: CharSet charSet = new CharSet();
108: c = s.charAt(++i);
109: if (c == '!') {
110: charSet.negate = true;
111: c = s.charAt(++i);
112: }
113: boolean first = true;
114: boolean dash = false;
115: char pc = c;
116: while (first || c != ']') { // ] must be first if at all
117: if (!first && c == '-') {
118: c = s.charAt(++i);
119: if (c == ']') {
120: charSet.add('-');
121: break;
122: }
123: while (++pc <= c)
124: charSet.add(pc);
125: } else {
126: charSet.add(c);
127: pc = c;
128: }
129: c = s.charAt(++i);
130: first = false;
131: }
132: segmentList.add(charSet.finish());
133: startPos = i + 1;
134: }
135: }
136: if (startPos < s.length()) {
137: segmentList.add(new Match(s.substring(startPos)));
138: }
139: segments = (Segment[]) segmentList
140: .toArray(new Segment[segmentList.size()]);
141: int off = 0; // The min chars needed for all remaining segments
142: for (int i = segments.length; --i >= 0;) {
143: off += segments[i].getMin();
144: segments[i].setOff(off);
145: }
146: // for (int i =0; i < segments.length; i++) {
147: // System.out.println(segments[i].getClass().getName()
148: // + ": "
149: // + segments[i]);
150: // }
151: }
152:
153: /**
154: * Check if a string matches the pattern. The procedure is to step
155: * through the segments in sequence and see if the segment matches
156: * the beginning of the current string. If it doesn't return false
157: * (failure). If it does, trim off the matched characters and
158: * repeat for the next segment. Each segment specifies the minimum
159: * and maximum characters that it can match. If the maximum
160: * exceeds the minimum, all possibilities are tested starting with
161: * the longest (maximum number of character) until a match is
162: * achieved. If all the possibilities fail, then this method
163: * fails.
164: * @param s the string to test
165: * @return true if a match
166: **/
167: public boolean match(String s) {
168: return match(s, 0);
169: }
170:
171: /**
172: * Check if a string matches the pattern starting with a
173: * particular segment.
174: * @see match(String s)
175: * @param s the string to test
176: * @param ix the index of the segment to start with (recursive
177: * call).
178: * @return true if a match
179: **/
180: private boolean match(String s, int ix) {
181: for (int n = segments.length; ix < n;) {
182: Segment seg = segments[ix];
183: int off = seg.getOff();
184: if (off > s.length()) {
185: return false; // Not enough chars left
186: }
187: if (!seg.match(s))
188: return false;
189: int min = seg.getMin();
190: int nextOff = off - min;
191: int max = Math.min(seg.getMax(), s.length() - nextOff);
192: for (int pos = max; pos > min; pos--) {
193: if (match(s.substring(pos), ix + 1)) {
194: return true;
195: }
196: }
197: s = s.substring(min); // Tail recursion
198: ix = ix + 1;
199: }
200: return s.length() == 0;
201: }
202:
203: /**
204: * Convert the parsed pattern to a string
205: * @return String representation of parsed pattern
206: **/
207: public String toString() {
208: return appendString(new StringBuffer()).toString();
209: }
210:
211: public StringBuffer appendString(StringBuffer buf) {
212: for (int i = 0; i < segments.length; i++) {
213: buf.append(segments[i].toString());
214: }
215: return buf;
216: }
217:
218: public interface Segment {
219: void setOff(int newOff);
220:
221: int getOff();
222:
223: int getMin(); // The minimum length of this segment
224:
225: int getMax(); // The maximum length of this segment
226:
227: boolean match(String s);
228: }
229:
230: private static class SegmentBase {
231: private int min;
232: private int max;
233: private int off;
234:
235: protected SegmentBase(int min, int max) {
236: this .min = min;
237: this .max = max;
238: }
239:
240: public final void setOff(int newOff) {
241: off = newOff;
242: }
243:
244: public final int getOff() {
245: return off;
246: }
247:
248: public final int getMax() {
249: return max;
250: }
251:
252: public final int getMin() {
253: return min;
254: }
255: }
256:
257: private static class Match extends SegmentBase implements Segment {
258: String chars;
259:
260: public Match(String s) {
261: super (s.length(), s.length());
262: chars = s;
263: }
264:
265: public boolean match(String s) {
266: return s.startsWith(chars);
267: }
268:
269: public String toString() {
270: return chars;
271: }
272: }
273:
274: private static class Star extends SegmentBase implements Segment {
275: public Star() {
276: super (0, Integer.MAX_VALUE);
277: }
278:
279: public boolean match(String s) {
280: return true;
281: }
282:
283: public String toString() {
284: return "*";
285: }
286: }
287:
288: private static class One extends SegmentBase {
289: public One() {
290: super (1, 1);
291: }
292: }
293:
294: private static class AnyOne extends One implements Segment {
295: public boolean match(String s) {
296: return true;
297: }
298:
299: public String toString() {
300: return "?";
301: }
302: }
303:
304: private static class CharSet extends One implements Segment {
305: public boolean negate = false;
306: private StringBuffer buf = new StringBuffer();
307: private String chars = null;
308:
309: public void add(char c) {
310: buf.append(c);
311: }
312:
313: public CharSet finish() {
314: chars = buf.substring(0);
315: buf = null;
316: return this ;
317: }
318:
319: public boolean match(String s) {
320: char c = s.charAt(0);
321: return (chars.indexOf(c) >= 0) != negate;
322: }
323:
324: public String toString() {
325: StringBuffer b = new StringBuffer();
326: b.append("[");
327: if (negate)
328: b.append("!");
329: if (chars.indexOf(']') >= 0)
330: b.append(']');
331: int charsInRange = 0;
332: int nextInRange = -1;
333: for (int i = 0, n = chars.length(); i <= n; i++) {
334: int c;
335: if (i < n) {
336: c = chars.charAt(i);
337: } else {
338: c = -1;
339: }
340: if (c == '-')
341: continue;
342: if (c == ']')
343: continue;
344: if (charsInRange > 0 && c == nextInRange) {
345: charsInRange++;
346: } else {
347: if (charsInRange > 2)
348: b.append('-');
349: if (charsInRange > 1)
350: b.append((char) (nextInRange - 1));
351: if (i < n) {
352: b.append((char) c);
353: charsInRange = 1;
354: }
355: }
356: nextInRange = c + 1;
357: }
358: if (chars.indexOf('-') >= 0)
359: b.append('-');
360: b.append("]");
361: return b.toString();
362: }
363: }
364: }
|