001: /*
002: * $Id: GlobCompiler.java,v 1.8 2003/11/07 20:16:24 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.oro.text;
059:
060: import org.apache.oro.text.regex.*;
061:
062: /**
063: * The GlobCompiler class will compile a glob expression into a Perl5Pattern
064: * that may be used to match patterns in conjunction with Perl5Matcher.
065: * Rather than create extra GlobMatcher and GlobPattern classes tailored
066: * to the task of matching glob expressions, we have simply reused the
067: * Perl5 regular expression classes from org.apache.oro.text.regex by
068: * making GlobCompiler translate a glob expression into a Perl5 expression
069: * that is compiled by a Perl5Compiler instance internal to the GlobCompiler.
070: * <p>
071: * Because there are various similar glob expression syntaxes, GlobCompiler
072: * tries to provide a small amount of customization by providing the
073: * {@link #STAR_CANNOT_MATCH_NULL_MASK}
074: * and {@link #QUESTION_MATCHES_ZERO_OR_ONE_MASK} compilation options.
075: * <p>
076: * The GlobCompiler expression syntax is based on Unix shell glob expressions
077: * but should be usable to simulate Win32 wildcards. The following syntax
078: * is supported:
079: * <ul>
080: * <li> <b>*</b> - Matches zero or more instances of any character. If the
081: * STAR_CANNOT_MATCH_NULL_MASK option is used, <b>*</b> matches
082: * one or more instances of any character.
083: * <li> <b>?</b> - Matches one instance of any character. If the
084: * QUESTION_MATCHES_ZERO_OR_ONE_MASK option is used, <b>?</b>
085: * matches zero or one instances of any character.
086: * <li> <b>[...]</b> - Matches any of characters enclosed by the brackets.
087: * <b> * </b> and <b>?</b> lose their special meanings within a
088: * character class. Additionaly if the first character following
089: * the opening bracket is a <b>!</b> or a <b>^</b>, then any
090: * character not in the character class is matched. A <b>-</b>
091: * between two characters can be used to denote a range. A
092: * <b>-</b> at the beginning or end of the character class matches
093: * itself rather than referring to a range. A <b>]</b> immediately
094: * following the opening <b>[</b> matches itself rather than
095: * indicating the end of the character class, otherwise it must be
096: * escaped with a backslash to refer to itself.
097: * <li> <b>\</b> - A backslash matches itself in most situations. But
098: * when a special character such as a <b>*</b> follows it, a
099: * backslash <em> escapes </em> the character, indicating that
100: * the special chracter should be interpreted as a normal character
101: * instead of its special meaning.
102: * <li> All other characters match themselves.
103: * </ul>
104: * <p>
105: * Please remember that the when you construct a Java string in Java code,
106: * the backslash character is itself a special Java character, and it must
107: * be double backslashed to represent single backslash in a regular
108: * expression.
109: *
110: * @version @version@
111: * @since 1.0
112: * @see org.apache.oro.text.regex.PatternCompiler
113: * @see org.apache.oro.text.regex.Perl5Matcher
114: */
115: public final class GlobCompiler implements PatternCompiler {
116: /**
117: * The default mask for the {@link #compile compile} methods.
118: * It is equal to 0. The default behavior is for a glob expression to
119: * be case sensitive unless it is compiled with the CASE_INSENSITIVE_MASK
120: * option.
121: */
122: public static final int DEFAULT_MASK = 0;
123:
124: /**
125: * A mask passed as an option to the {@link #compile compile} methods
126: * to indicate a compiled glob expression should be case insensitive.
127: */
128: public static final int CASE_INSENSITIVE_MASK = 0x0001;
129:
130: /**
131: * A mask passed as an option to the {@link #compile compile} methods
132: * to indicate that a * should not be allowed to match the null string.
133: * The normal behavior of the * metacharacter is that it may match any
134: * 0 or more characters. This mask causes it to match 1 or more
135: * characters of anything.
136: */
137: public static final int STAR_CANNOT_MATCH_NULL_MASK = 0x0002;
138:
139: /**
140: * A mask passed as an option to the {@link #compile compile} methods
141: * to indicate that a ? should not be allowed to match the null string.
142: * The normal behavior of the ? metacharacter is that it may match any 1
143: * character. This mask causes it to match 0 or 1 characters.
144: */
145: public static final int QUESTION_MATCHES_ZERO_OR_ONE_MASK = 0x0004;
146:
147: /**
148: * A mask passed as an option to the {@link #compile compile} methods
149: * to indicate that the resulting Perl5Pattern should be treated as a
150: * read only data structure by Perl5Matcher, making it safe to share
151: * a single Perl5Pattern instance among multiple threads without needing
152: * synchronization. Without this option, Perl5Matcher reserves the right
153: * to store heuristic or other information in Perl5Pattern that might
154: * accelerate future matches. When you use this option, Perl5Matcher will
155: * not store or modify any information in a Perl5Pattern. Use this option
156: * when you want to share a Perl5Pattern instance among multiple threads
157: * using different Perl5Matcher instances.
158: */
159: public static final int READ_ONLY_MASK = 0x0008;
160:
161: private Perl5Compiler __perl5Compiler;
162:
163: private static boolean __isPerl5MetaCharacter(char ch) {
164: return (ch == '*' || ch == '?' || ch == '+' || ch == '['
165: || ch == ']' || ch == '(' || ch == ')' || ch == '|'
166: || ch == '^' || ch == '$' || ch == '.' || ch == '{'
167: || ch == '}' || ch == '\\');
168: }
169:
170: private static boolean __isGlobMetaCharacter(char ch) {
171: return (ch == '*' || ch == '?' || ch == '[' || ch == ']');
172: }
173:
174: /**
175: * This static method is the basic engine of the Glob PatternCompiler
176: * implementation. It takes a glob expression in the form of a character
177: * array and converts it into a String representation of a Perl5 pattern.
178: * The method is made public so that programmers may use it for their
179: * own purposes. However, the GlobCompiler compile methods work by
180: * converting the glob pattern to a Perl5 pattern using this method, and
181: * then invoking the compile() method of an internally stored Perl5Compiler
182: * instance.
183: * <p>
184: * @param pattern A character array representation of a Glob pattern.
185: * @return A String representation of a Perl5 pattern equivalent to the
186: * Glob pattern.
187: */
188: public static String globToPerl5(char[] pattern, int options) {
189: boolean inCharSet, starCannotMatchNull = false, questionMatchesZero;
190: int ch;
191: StringBuffer buffer;
192:
193: buffer = new StringBuffer(2 * pattern.length);
194: inCharSet = false;
195:
196: questionMatchesZero = ((options & QUESTION_MATCHES_ZERO_OR_ONE_MASK) != 0);
197: starCannotMatchNull = ((options & STAR_CANNOT_MATCH_NULL_MASK) != 0);
198:
199: for (ch = 0; ch < pattern.length; ch++) {
200: switch (pattern[ch]) {
201: case '*':
202: if (inCharSet)
203: buffer.append('*');
204: else {
205: if (starCannotMatchNull)
206: buffer.append(".+");
207: else
208: buffer.append(".*");
209: }
210: break;
211: case '?':
212: if (inCharSet)
213: buffer.append('?');
214: else {
215: if (questionMatchesZero)
216: buffer.append(".?");
217: else
218: buffer.append('.');
219: }
220: break;
221: case '[':
222: inCharSet = true;
223: buffer.append(pattern[ch]);
224:
225: if (ch + 1 < pattern.length) {
226: switch (pattern[ch + 1]) {
227: case '!':
228: case '^':
229: buffer.append('^');
230: ++ch;
231: continue;
232: case ']':
233: buffer.append(']');
234: ++ch;
235: continue;
236: }
237: }
238: break;
239: case ']':
240: inCharSet = false;
241: buffer.append(pattern[ch]);
242: break;
243: case '\\':
244: buffer.append('\\');
245: if (ch == pattern.length - 1) {
246: buffer.append('\\');
247: } else if (__isGlobMetaCharacter(pattern[ch + 1]))
248: buffer.append(pattern[++ch]);
249: else
250: buffer.append('\\');
251: break;
252: default:
253: if (!inCharSet && __isPerl5MetaCharacter(pattern[ch]))
254: buffer.append('\\');
255: buffer.append(pattern[ch]);
256: break;
257: }
258: }
259:
260: return buffer.toString();
261: }
262:
263: /**
264: * The default GlobCompiler constructor. It initializes an internal
265: * Perl5Compiler instance to compile translated glob expressions.
266: */
267: public GlobCompiler() {
268: __perl5Compiler = new Perl5Compiler();
269: }
270:
271: /**
272: * Compiles a Glob expression into a Perl5Pattern instance that
273: * can be used by a Perl5Matcher object to perform pattern matching.
274: * <p>
275: * @param pattern A Glob expression to compile.
276: * @param options A set of flags giving the compiler instructions on
277: * how to treat the glob expression. The flags
278: * are a logical OR of any number of the 3 <b>MASK</b>
279: * constants. For example:
280: * <pre>
281: * regex =
282: * compiler.compile(pattern, GlobCompiler.
283: * CASE_INSENSITIVE_MASK |
284: * GlobCompiler.STAR_CANNOT_MATCH_NULL_MASK);
285: * </pre>
286: * This says to compile the pattern so that *
287: * cannot match the null string and to perform
288: * matches in a case insensitive manner.
289: * @return A Pattern instance constituting the compiled expression.
290: * This instance will always be a Perl5Pattern and can be reliably
291: * casted to a Perl5Pattern.
292: * @exception MalformedPatternException If the compiled expression
293: * is not a valid Glob expression.
294: */
295: public Pattern compile(char[] pattern, int options)
296: throws MalformedPatternException {
297: int perlOptions = 0;
298: if ((options & CASE_INSENSITIVE_MASK) != 0)
299: perlOptions |= Perl5Compiler.CASE_INSENSITIVE_MASK;
300: if ((options & READ_ONLY_MASK) != 0)
301: perlOptions |= Perl5Compiler.READ_ONLY_MASK;
302: return __perl5Compiler.compile(globToPerl5(pattern, options),
303: perlOptions);
304: }
305:
306: /**
307: * Same as calling <b>compile(pattern, GlobCompiler.DEFAULT_MASK);</b>
308: * <p>
309: * @param pattern A regular expression to compile.
310: * @return A Pattern instance constituting the compiled regular expression.
311: * This instance will always be a Perl5Pattern and can be reliably
312: * casted to a Perl5Pattern.
313: * @exception MalformedPatternException If the compiled expression
314: * is not a valid Glob expression.
315: */
316: public Pattern compile(char[] pattern)
317: throws MalformedPatternException {
318: return compile(pattern, DEFAULT_MASK);
319: }
320:
321: /**
322: * Same as calling <b>compile(pattern, GlobCompiler.DEFAULT_MASK);</b>
323: * <p>
324: * @param pattern A regular expression to compile.
325: * @return A Pattern instance constituting the compiled regular expression.
326: * This instance will always be a Perl5Pattern and can be reliably
327: * casted to a Perl5Pattern.
328: * @exception MalformedPatternException If the compiled expression
329: * is not a valid Glob expression.
330: */
331: public Pattern compile(String pattern)
332: throws MalformedPatternException {
333: return compile(pattern.toCharArray(), DEFAULT_MASK);
334: }
335:
336: /**
337: * Compiles a Glob expression into a Perl5Pattern instance that
338: * can be used by a Perl5Matcher object to perform pattern matching.
339: * <p>
340: * @param pattern A Glob expression to compile.
341: * @param options A set of flags giving the compiler instructions on
342: * how to treat the glob expression. The flags
343: * are a logical OR of any number of the 3 <b>MASK</b>
344: * constants. For example:
345: * <pre>
346: * regex =
347: * compiler.compile("*.*", GlobCompiler.
348: * CASE_INSENSITIVE_MASK |
349: * GlobCompiler.STAR_CANNOT_MATCH_NULL_MASK);
350: * </pre>
351: * This says to compile the pattern so that *
352: * cannot match the null string and to perform
353: * matches in a case insensitive manner.
354: * @return A Pattern instance constituting the compiled expression.
355: * This instance will always be a Perl5Pattern and can be reliably
356: * casted to a Perl5Pattern.
357: * @exception MalformedPatternException If the compiled expression
358: * is not a valid Glob expression.
359: */
360: public Pattern compile(String pattern, int options)
361: throws MalformedPatternException {
362: return compile(pattern.toCharArray(), options);
363: }
364:
365: }
|