001: /*
002: * $Id: MetaPattern.java 505472 2007-02-09 20:01:57Z ehillenius $
003: * $Revision: 505472 $ $Date: 2007-02-09 21:01:57 +0100 (Fri, 09 Feb 2007) $
004: *
005: * ==============================================================================
006: * Licensed under the Apache License, Version 2.0 (the "License"); you may not
007: * use this file except in compliance with the License. You may obtain a copy of
008: * the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
014: * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
015: * License for the specific language governing permissions and limitations under
016: * the License.
017: */
018: package wicket.util.parse.metapattern;
019:
020: import java.util.Arrays;
021: import java.util.List;
022: import java.util.regex.Matcher;
023: import java.util.regex.Pattern;
024:
025: /**
026: * Useful class for constructing readable and reusable regular expressions.
027: * <p>
028: * MetaPatterns can be contructed from a simple regular expression String, from
029: * other MetaPatterns (copy constructor), from a list of MetaPatterns or from an
030: * array of MetaPatterns. In this way, it is easy to build up larger patterns
031: * while transparently binding the capturing groups of each MetaPattern for easy
032: * object oriented access to capturing group matches.
033: * <p>
034: * A given MetaPattern can be converted to a Matcher or Pattern. Groups within
035: * the MetaPattern can be used to automatically reference capturing group values
036: * when a match is made with a Matcher object.
037: * <p>
038: * A variety of static constants are provided for use in constructing compound
039: * MetaPatterns. Also, a number of simple parsers have been constructed using
040: * MetaPatterns in the parsers subpackage.
041: *
042: * @author Jonathan Locke
043: */
044: public class MetaPattern {
045: /**
046: * Compiled regular expression pattern, or null if patterns variable is
047: * valid instead
048: */
049: private Pattern pattern;
050:
051: /** List of patterns, or null if pattern variable is valid instead */
052: private List patterns;
053:
054: /** The compiled MetaPattern */
055: private Pattern compiledPattern;
056:
057: // Regexps that are used multiple times in defining meta patterns
058: private static final String _DOUBLE_QUOTED_STRING = "\"[^\"]*?\"";
059: private static final String _SINGLE_QUOTED_STRING = "'[^']*?\'";
060: private static final String _STRING = "(?:[\\w\\-\\.]+|"
061: + _DOUBLE_QUOTED_STRING + "|" + _SINGLE_QUOTED_STRING + ")";
062: private static final String _OPTIONAL_STRING = _STRING + "?";
063: private static final String _VARIABLE_NAME = "[A-Za-z_][A-Za-z0-9_]*";
064: private static final String _XML_ATTRIBUTE_NAME = "[A-Za-z_][A-Za-z0-9_-]*";
065:
066: // Delimiters and punctuation
067: /** Constant for whitespace. */
068: public static final MetaPattern WHITESPACE = new MetaPattern("\\s+");
069:
070: /** Constant for optional whitespace. */
071: public static final MetaPattern OPTIONAL_WHITESPACE = new MetaPattern(
072: "\\s*");
073:
074: /** Constant for non-word. */
075: public static final MetaPattern NON_WORD = new MetaPattern("\\W+");
076:
077: /** Constant for comma. */
078: public static final MetaPattern COMMA = new MetaPattern(",");
079:
080: /** Constant for colon. */
081: public static final MetaPattern COLON = new MetaPattern(":");
082:
083: /** Constant for semicolon. */
084: public static final MetaPattern SEMICOLON = new MetaPattern(";");
085:
086: /** Constant for slash. */
087: public static final MetaPattern SLASH = new MetaPattern("/");
088:
089: /** Constant for backslash. */
090: public static final MetaPattern BACKSLASH = new MetaPattern("\\\\");
091:
092: /** Constant for dot. */
093: public static final MetaPattern DOT = new MetaPattern("\\.");
094:
095: /** Constant for plus. */
096: public static final MetaPattern PLUS = new MetaPattern("\\+");
097:
098: /** Constant for minus. */
099: public static final MetaPattern MINUS = new MetaPattern("-");
100:
101: /** Constant for dash. */
102: public static final MetaPattern DASH = new MetaPattern("-");
103:
104: /** Constant for underscore. */
105: public static final MetaPattern UNDERSCORE = new MetaPattern("_");
106:
107: /** Constant for ampersand. */
108: public static final MetaPattern AMPERSAND = new MetaPattern("&");
109:
110: /** Constant for percent. */
111: public static final MetaPattern PERCENT = new MetaPattern("");
112:
113: /** Constant for dollar. */
114: public static final MetaPattern DOLLAR_SIGN = new MetaPattern("$");
115:
116: /** Constant for pound. */
117: public static final MetaPattern POUND_SIGN = new MetaPattern("#");
118:
119: /** Constant for at. */
120: public static final MetaPattern AT_SIGN = new MetaPattern("@");
121:
122: /** Constant for excl. */
123: public static final MetaPattern EXCLAMATION_POINT = new MetaPattern(
124: "!");
125:
126: /** Constant for tilde. */
127: public static final MetaPattern TILDE = new MetaPattern("~");
128:
129: /** Constant for equals. */
130: public static final MetaPattern EQUALS = new MetaPattern("=");
131:
132: /** Constant for star. */
133: public static final MetaPattern STAR = new MetaPattern("\\*");
134:
135: /** Constant for pipe. */
136: public static final MetaPattern PIPE = new MetaPattern("\\|");
137:
138: /** Constant for left paren. */
139: public static final MetaPattern LEFT_PAREN = new MetaPattern("\\(");
140:
141: /** Constant for right paren. */
142: public static final MetaPattern RIGHT_PAREN = new MetaPattern("\\)");
143:
144: /** Constant for left curly braces. */
145: public static final MetaPattern LEFT_CURLY = new MetaPattern("\\{");
146:
147: /** Constant for right curly braces. */
148: public static final MetaPattern RIGHT_CURLY = new MetaPattern("\\}");
149:
150: /** Constant for left square bracket. */
151: public static final MetaPattern LEFT_SQUARE = new MetaPattern("\\[");
152:
153: /** Constant for right square bracket. */
154: public static final MetaPattern RIGHT_SQUARE = new MetaPattern(
155: "\\]");
156:
157: /** Constant for digit. */
158: public static final MetaPattern DIGIT = new MetaPattern("\\d");
159:
160: /** Constant for digits. */
161: public static final MetaPattern DIGITS = new MetaPattern("\\d+");
162:
163: /** Constant for an integer (of any size). */
164: public static final MetaPattern INTEGER = new MetaPattern("-?\\d+");
165:
166: /** Constant for a floating point number. */
167: public static final MetaPattern FLOATING_POINT_NUMBER = new MetaPattern(
168: "-?\\d+\\.?\\d*|-?\\.\\d+");
169:
170: /** Constant for a positive integer. */
171: public static final MetaPattern POSITIVE_INTEGER = new MetaPattern(
172: "\\d+");
173:
174: /** Constant for hex digit. */
175: public static final MetaPattern HEXADECIMAL_DIGIT = new MetaPattern(
176: "[0-9a-fA-F]");
177:
178: /** Constant for hex digits. */
179: public static final MetaPattern HEXADECIMAL_DIGITS = new MetaPattern(
180: "[0-9a-fA-F]+");
181:
182: /** Constant for anything (string). */
183: public static final MetaPattern ANYTHING = new MetaPattern(".*");
184:
185: /** Constant for anything non-empty (string). */
186: public static final MetaPattern ANYTHING_NON_EMPTY = new MetaPattern(
187: ".+");
188:
189: /** Constant for a word. */
190: public static final MetaPattern WORD = new MetaPattern("\\w+");
191:
192: /** Constant for an optional word. */
193: public static final MetaPattern OPTIONAL_WORD = new MetaPattern(
194: "\\w*");
195:
196: /** Constant for a variable name. */
197: public static final MetaPattern VARIABLE_NAME = new MetaPattern(
198: _VARIABLE_NAME);
199:
200: /** Constant for a xml attribute name. */
201: public static final MetaPattern XML_ATTRIBUTE_NAME = new MetaPattern(
202: _XML_ATTRIBUTE_NAME);
203:
204: /** Constant for perl interpolation. */
205: public static final MetaPattern PERL_INTERPOLATION = new MetaPattern(
206: "$\\{" + _VARIABLE_NAME + "\\}");
207:
208: /** Constant for a double quoted string. */
209: public static final MetaPattern DOUBLE_QUOTED_STRING = new MetaPattern(
210: _DOUBLE_QUOTED_STRING);
211:
212: /** Constant for a string. */
213: public static final MetaPattern STRING = new MetaPattern(_STRING);
214:
215: /** Constant for an optional string. */
216: public static final MetaPattern OPTIONAL_STRING = new MetaPattern(
217: _OPTIONAL_STRING);
218:
219: /**
220: * Constructor for a simple pattern.
221: *
222: * @param pattern
223: * The regular expression pattern to compile
224: */
225: public MetaPattern(final String pattern) {
226: this .pattern = Pattern.compile(pattern);
227: }
228:
229: /**
230: * Copy constructor.
231: *
232: * @param pattern
233: * The meta pattern to copy
234: */
235: public MetaPattern(final MetaPattern pattern) {
236: this .pattern = pattern.pattern;
237: this .patterns = pattern.patterns;
238: this .compiledPattern = pattern.compiledPattern;
239: }
240:
241: /**
242: * Constructs from an array of MetaPatterns.
243: *
244: * @param patterns
245: * Array of MetaPatterns
246: */
247: public MetaPattern(final MetaPattern[] patterns) {
248: this (Arrays.asList(patterns));
249: }
250:
251: /**
252: * Constructs from a list of MetaPatterns
253: *
254: * @param patterns
255: * List of MetaPatterns
256: */
257: public MetaPattern(final List patterns) {
258: this .patterns = patterns;
259: }
260:
261: /**
262: * Creates a matcher against a given input character sequence.
263: *
264: * @param input
265: * The input to match against
266: * @return The matcher
267: */
268: public final Matcher matcher(final CharSequence input) {
269: return matcher(input, 0);
270: }
271:
272: /**
273: * Creates a matcher with the given regexp compile flags. Once you call this
274: * method with a given regexp compile flag value, the pattern will be
275: * compiled. Calling it again with a different value for flags will not
276: * recompile the pattern.
277: *
278: * @param input
279: * The input to match
280: * @param flags
281: * One or more of the standard Java regular expression compile
282: * flags (see {@link Pattern#compile(String, int)})
283: * @return The matcher
284: */
285: public final Matcher matcher(final CharSequence input,
286: final int flags) {
287: compile(flags);
288: return compiledPattern.matcher(input);
289: }
290:
291: /**
292: * Gets the regular expression Pattern for this MetaPattern by compiling it.
293: *
294: * @return Pattern compiled with default Java regular expression compile
295: * flags
296: */
297: public final Pattern pattern() {
298: return pattern(0);
299: }
300:
301: /**
302: * Gets the regular expression Pattern for this MetaPattern by compiling it
303: * using the given flags.
304: *
305: * @param flags
306: * One or more of the standard Java regular expression compile
307: * flags (see {@link Pattern#compile(String, int)})
308: * @return Equivalent Java regular expression Pattern compiled with the
309: * given flags
310: */
311: public final Pattern pattern(final int flags) {
312: compile(flags);
313: return compiledPattern;
314: }
315:
316: /**
317: * Converts this MetaPattern to a String.
318: *
319: * @return A String representing this MetaPattern
320: * @see java.lang.Object#toString()
321: */
322: public String toString() {
323: if (pattern != null) {
324: return pattern.pattern();
325: } else {
326: final StringBuffer buffer = new StringBuffer();
327: for (int i = 0; i < patterns.size(); i++) {
328: buffer.append(patterns.get(i));
329: }
330: return buffer.toString();
331: }
332: }
333:
334: /**
335: * Compiles this MetaPattern with the given Java regular expression flags.
336: *
337: * @param flags
338: * One or more of the standard Java regular expression compile
339: * flags (see {@link Pattern#compile(String, int)})
340: */
341: private synchronized void compile(final int flags) {
342: if (compiledPattern == null) {
343: bind(1);
344: compiledPattern = Pattern.compile(toString(), flags);
345: }
346: }
347:
348: /**
349: * Binds this MetaPattern to one or more capturing groups. Since
350: * MetaPatterns can nest, the binding process can recurse.
351: *
352: * @param group
353: * The initial capturing group number
354: * @return The final capturing group (for use in recursion)
355: */
356: private int bind(int group) {
357: if (this instanceof Group) {
358: ((Group) this ).bind(group++);
359: }
360:
361: if (patterns != null) {
362: for (int i = 0; i < patterns.size(); i++) {
363: group = ((MetaPattern) patterns.get(i)).bind(group);
364: }
365: }
366:
367: return group;
368: }
369: }
|