001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.commons.lang.text;
018:
019: import java.util.Arrays;
020:
021: /**
022: * A matcher class that can be queried to determine if a character array
023: * portion matches.
024: * <p>
025: * This class comes complete with various factory methods.
026: * If these do not suffice, you can subclass and implement your own matcher.
027: *
028: * @author Stephen Colebourne
029: * @since 2.2
030: * @version $Id: StrMatcher.java 437554 2006-08-28 06:21:41Z bayard $
031: */
032: public abstract class StrMatcher {
033:
034: /**
035: * Matches the comma character.
036: */
037: private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
038: /**
039: * Matches the tab character.
040: */
041: private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
042: /**
043: * Matches the space character.
044: */
045: private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
046: /**
047: * Matches the same characters as StringTokenizer,
048: * namely space, tab, newline, formfeed.
049: */
050: private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(
051: " \t\n\r\f".toCharArray());
052: /**
053: * Matches the String trim() whitespace characters.
054: */
055: private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
056: /**
057: * Matches the double quote character.
058: */
059: private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher(
060: '\'');
061: /**
062: * Matches the double quote character.
063: */
064: private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher(
065: '"');
066: /**
067: * Matches the single or double quote character.
068: */
069: private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher(
070: "'\"".toCharArray());
071: /**
072: * Matches no characters.
073: */
074: private static final StrMatcher NONE_MATCHER = new NoMatcher();
075:
076: // -----------------------------------------------------------------------
077:
078: /**
079: * Returns a matcher which matches the comma character.
080: *
081: * @return a matcher for a comma
082: */
083: public static StrMatcher commaMatcher() {
084: return COMMA_MATCHER;
085: }
086:
087: /**
088: * Returns a matcher which matches the tab character.
089: *
090: * @return a matcher for a tab
091: */
092: public static StrMatcher tabMatcher() {
093: return TAB_MATCHER;
094: }
095:
096: /**
097: * Returns a matcher which matches the space character.
098: *
099: * @return a matcher for a space
100: */
101: public static StrMatcher spaceMatcher() {
102: return SPACE_MATCHER;
103: }
104:
105: /**
106: * Matches the same characters as StringTokenizer,
107: * namely space, tab, newline and formfeed.
108: *
109: * @return the split matcher
110: */
111: public static StrMatcher splitMatcher() {
112: return SPLIT_MATCHER;
113: }
114:
115: /**
116: * Matches the String trim() whitespace characters.
117: *
118: * @return the trim matcher
119: */
120: public static StrMatcher trimMatcher() {
121: return TRIM_MATCHER;
122: }
123:
124: /**
125: * Returns a matcher which matches the single quote character.
126: *
127: * @return a matcher for a single quote
128: */
129: public static StrMatcher singleQuoteMatcher() {
130: return SINGLE_QUOTE_MATCHER;
131: }
132:
133: /**
134: * Returns a matcher which matches the double quote character.
135: *
136: * @return a matcher for a double quote
137: */
138: public static StrMatcher doubleQuoteMatcher() {
139: return DOUBLE_QUOTE_MATCHER;
140: }
141:
142: /**
143: * Returns a matcher which matches the single or double quote character.
144: *
145: * @return a matcher for a single or double quote
146: */
147: public static StrMatcher quoteMatcher() {
148: return QUOTE_MATCHER;
149: }
150:
151: /**
152: * Matches no characters.
153: *
154: * @return a matcher that matches nothing
155: */
156: public static StrMatcher noneMatcher() {
157: return NONE_MATCHER;
158: }
159:
160: /**
161: * Constructor that creates a matcher from a character.
162: *
163: * @param ch the character to match, must not be null
164: * @return a new Matcher for the given char
165: */
166: public static StrMatcher charMatcher(char ch) {
167: return new CharMatcher(ch);
168: }
169:
170: /**
171: * Constructor that creates a matcher from a set of characters.
172: *
173: * @param chars the characters to match, null or empty matches nothing
174: * @return a new matcher for the given char[]
175: */
176: public static StrMatcher charSetMatcher(char[] chars) {
177: if (chars == null || chars.length == 0) {
178: return NONE_MATCHER;
179: }
180: if (chars.length == 1) {
181: return new CharMatcher(chars[0]);
182: }
183: return new CharSetMatcher(chars);
184: }
185:
186: /**
187: * Constructor that creates a matcher from a string representing a set of characters.
188: *
189: * @param chars the characters to match, null or empty matches nothing
190: * @return a new Matcher for the given characters
191: */
192: public static StrMatcher charSetMatcher(String chars) {
193: if (chars == null || chars.length() == 0) {
194: return NONE_MATCHER;
195: }
196: if (chars.length() == 1) {
197: return new CharMatcher(chars.charAt(0));
198: }
199: return new CharSetMatcher(chars.toCharArray());
200: }
201:
202: /**
203: * Constructor that creates a matcher from a string.
204: *
205: * @param str the string to match, null or empty matches nothing
206: * @return a new Matcher for the given String
207: */
208: public static StrMatcher stringMatcher(String str) {
209: if (str == null || str.length() == 0) {
210: return NONE_MATCHER;
211: }
212: return new StringMatcher(str);
213: }
214:
215: //-----------------------------------------------------------------------
216: /**
217: * Constructor.
218: */
219: protected StrMatcher() {
220: super ();
221: }
222:
223: /**
224: * Returns the number of matching characters, zero for no match.
225: * <p>
226: * This method is called to check for a match.
227: * The parameter <code>pos</code> represents the current position to be
228: * checked in the string <code>buffer</code> (a character array which must
229: * not be changed).
230: * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
231: * <p>
232: * The character array may be larger than the active area to be matched.
233: * Only values in the buffer between the specifed indices may be accessed.
234: * <p>
235: * The matching code may check one character or many.
236: * It may check characters preceeding <code>pos</code> as well as those
237: * after, so long as no checks exceed the bounds specified.
238: * <p>
239: * It must return zero for no match, or a positive number if a match was found.
240: * The number indicates the number of characters that matched.
241: *
242: * @param buffer the text content to match against, do not change
243: * @param pos the starting position for the match, valid for buffer
244: * @param bufferStart the first active index in the buffer, valid for buffer
245: * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
246: * @return the number of matching characters, zero for no match
247: */
248: public abstract int isMatch(char[] buffer, int pos,
249: int bufferStart, int bufferEnd);
250:
251: //-----------------------------------------------------------------------
252: /**
253: * Class used to define a set of characters for matching purposes.
254: */
255: static final class CharSetMatcher extends StrMatcher {
256: /** The set of characters to match. */
257: private char[] chars;
258:
259: /**
260: * Constructor that creates a matcher from a character array.
261: *
262: * @param chars the characters to match, must not be null
263: */
264: CharSetMatcher(char chars[]) {
265: super ();
266: this .chars = (char[]) chars.clone();
267: Arrays.sort(this .chars);
268: }
269:
270: /**
271: * Returns whether or not the given charatcer matches.
272: *
273: * @param buffer the text content to match against, do not change
274: * @param pos the starting position for the match, valid for buffer
275: * @param bufferStart the first active index in the buffer, valid for buffer
276: * @param bufferEnd the end index of the active buffer, valid for buffer
277: * @return the number of matching characters, zero for no match
278: */
279: public int isMatch(char[] buffer, int pos, int bufferStart,
280: int bufferEnd) {
281: return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
282: }
283: }
284:
285: //-----------------------------------------------------------------------
286: /**
287: * Class used to define a character for matching purposes.
288: */
289: static final class CharMatcher extends StrMatcher {
290: /** The character to match. */
291: private char ch;
292:
293: /**
294: * Constructor that creates a matcher that matches a single character.
295: *
296: * @param ch the character to match
297: */
298: CharMatcher(char ch) {
299: super ();
300: this .ch = ch;
301: }
302:
303: /**
304: * Returns whether or not the given character matches.
305: *
306: * @param buffer the text content to match against, do not change
307: * @param pos the starting position for the match, valid for buffer
308: * @param bufferStart the first active index in the buffer, valid for buffer
309: * @param bufferEnd the end index of the active buffer, valid for buffer
310: * @return the number of matching characters, zero for no match
311: */
312: public int isMatch(char[] buffer, int pos, int bufferStart,
313: int bufferEnd) {
314: return ch == buffer[pos] ? 1 : 0;
315: }
316: }
317:
318: //-----------------------------------------------------------------------
319: /**
320: * Class used to define a set of characters for matching purposes.
321: */
322: static final class StringMatcher extends StrMatcher {
323: /** The string to match, as a character array. */
324: private char[] chars;
325:
326: /**
327: * Constructor that creates a matcher from a String.
328: *
329: * @param str the string to match, must not be null
330: */
331: StringMatcher(String str) {
332: super ();
333: chars = str.toCharArray();
334: }
335:
336: /**
337: * Returns whether or not the given text matches the stored string.
338: *
339: * @param buffer the text content to match against, do not change
340: * @param pos the starting position for the match, valid for buffer
341: * @param bufferStart the first active index in the buffer, valid for buffer
342: * @param bufferEnd the end index of the active buffer, valid for buffer
343: * @return the number of matching characters, zero for no match
344: */
345: public int isMatch(char[] buffer, int pos, int bufferStart,
346: int bufferEnd) {
347: int len = chars.length;
348: if (pos + len > bufferEnd) {
349: return 0;
350: }
351: for (int i = 0; i < chars.length; i++, pos++) {
352: if (chars[i] != buffer[pos]) {
353: return 0;
354: }
355: }
356: return len;
357: }
358: }
359:
360: //-----------------------------------------------------------------------
361: /**
362: * Class used to match no characters.
363: */
364: static final class NoMatcher extends StrMatcher {
365:
366: /**
367: * Constructs a new instance of <code>NoMatcher</code>.
368: */
369: NoMatcher() {
370: super ();
371: }
372:
373: /**
374: * Always returns <code>false</code>.
375: *
376: * @param buffer the text content to match against, do not change
377: * @param pos the starting position for the match, valid for buffer
378: * @param bufferStart the first active index in the buffer, valid for buffer
379: * @param bufferEnd the end index of the active buffer, valid for buffer
380: * @return the number of matching characters, zero for no match
381: */
382: public int isMatch(char[] buffer, int pos, int bufferStart,
383: int bufferEnd) {
384: return 0;
385: }
386: }
387:
388: //-----------------------------------------------------------------------
389: /**
390: * Class used to match whitespace as per trim().
391: */
392: static final class TrimMatcher extends StrMatcher {
393:
394: /**
395: * Constructs a new instance of <code>TrimMatcher</code>.
396: */
397: TrimMatcher() {
398: super ();
399: }
400:
401: /**
402: * Returns whether or not the given charatcer matches.
403: *
404: * @param buffer the text content to match against, do not change
405: * @param pos the starting position for the match, valid for buffer
406: * @param bufferStart the first active index in the buffer, valid for buffer
407: * @param bufferEnd the end index of the active buffer, valid for buffer
408: * @return the number of matching characters, zero for no match
409: */
410: public int isMatch(char[] buffer, int pos, int bufferStart,
411: int bufferEnd) {
412: return buffer[pos] <= 32 ? 1 : 0;
413: }
414: }
415:
416: }
|