001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.commons.lang;
018:
019: import java.io.Serializable;
020: import java.util.HashMap;
021: import java.util.HashSet;
022: import java.util.Iterator;
023: import java.util.Map;
024: import java.util.Set;
025:
026: /**
027: * <p>A set of characters.</p>
028: *
029: * <p>Instances are immutable, but instances of subclasses may not be.</p>
030: *
031: * @author Stephen Colebourne
032: * @author Phil Steitz
033: * @author Pete Gieser
034: * @author Gary Gregory
035: * @since 1.0
036: * @version $Id: CharSet.java 471626 2006-11-06 04:02:09Z bayard $
037: */
038: public class CharSet implements Serializable {
039:
040: /**
041: * Required for serialization support. Lang version 2.0.
042: *
043: * @see java.io.Serializable
044: */
045: private static final long serialVersionUID = 5947847346149275958L;
046:
047: /**
048: * A CharSet defining no characters.
049: * @since 2.0
050: */
051: public static final CharSet EMPTY = new CharSet((String) null);
052:
053: /**
054: * A CharSet defining ASCII alphabetic characters "a-zA-Z".
055: * @since 2.0
056: */
057: public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
058:
059: /**
060: * A CharSet defining ASCII alphabetic characters "a-z".
061: * @since 2.0
062: */
063: public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
064:
065: /**
066: * A CharSet defining ASCII alphabetic characters "A-Z".
067: * @since 2.0
068: */
069: public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
070:
071: /**
072: * A CharSet defining ASCII alphabetic characters "0-9".
073: * @since 2.0
074: */
075: public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
076:
077: /**
078: * A Map of the common cases used in the factory.
079: * Subclasses can add more common patterns if desired.
080: * @since 2.0
081: */
082: protected static final Map COMMON = new HashMap();
083:
084: static {
085: COMMON.put(null, EMPTY);
086: COMMON.put("", EMPTY);
087: COMMON.put("a-zA-Z", ASCII_ALPHA);
088: COMMON.put("A-Za-z", ASCII_ALPHA);
089: COMMON.put("a-z", ASCII_ALPHA_LOWER);
090: COMMON.put("A-Z", ASCII_ALPHA_UPPER);
091: COMMON.put("0-9", ASCII_NUMERIC);
092: }
093:
094: /** The set of CharRange objects. */
095: private Set set = new HashSet();
096:
097: //-----------------------------------------------------------------------
098: /**
099: * <p>Factory method to create a new CharSet using a special syntax.</p>
100: *
101: * <ul>
102: * <li><code>null</code> or empty string ("")
103: * - set containing no characters</li>
104: * <li>Single character, such as "a"
105: * - set containing just that character</li>
106: * <li>Multi character, such as "a-e"
107: * - set containing characters from one character to the other</li>
108: * <li>Negated, such as "^a" or "^a-e"
109: * - set containing all characters except those defined</li>
110: * <li>Combinations, such as "abe-g"
111: * - set containing all the characters from the individual sets</li>
112: * </ul>
113: *
114: * <p>The matching order is:</p>
115: * <ol>
116: * <li>Negated multi character range, such as "^a-e"
117: * <li>Ordinary multi character range, such as "a-e"
118: * <li>Negated single character, such as "^a"
119: * <li>Ordinary single character, such as "a"
120: * </ol>
121: * <p>Matching works left to right. Once a match is found the
122: * search starts again from the next character.</p>
123: *
124: * <p>If the same range is defined twice using the same syntax, only
125: * one range will be kept.
126: * Thus, "a-ca-c" creates only one range of "a-c".</p>
127: *
128: * <p>If the start and end of a range are in the wrong order,
129: * they are reversed. Thus "a-e" is the same as "e-a".
130: * As a result, "a-ee-a" would create only one range,
131: * as the "a-e" and "e-a" are the same.</p>
132: *
133: * <p>The set of characters represented is the union of the specified ranges.</p>
134: *
135: * <p>All CharSet objects returned by this method will be immutable.</p>
136: *
137: * @param setStr the String describing the set, may be null
138: * @return a CharSet instance
139: * @since 2.0
140: */
141: public static CharSet getInstance(String setStr) {
142: Object set = COMMON.get(setStr);
143: if (set != null) {
144: return (CharSet) set;
145: }
146: return new CharSet(setStr);
147: }
148:
149: //-----------------------------------------------------------------------
150: /**
151: * <p>Constructs a new CharSet using the set syntax.</p>
152: *
153: * @param setStr the String describing the set, may be null
154: * @since 2.0
155: */
156: protected CharSet(String setStr) {
157: super ();
158: add(setStr);
159: }
160:
161: /**
162: * <p>Constructs a new CharSet using the set syntax.
163: * Each string is merged in with the set.</p>
164: *
165: * @param set Strings to merge into the initial set
166: * @throws NullPointerException if set is <code>null</code>
167: */
168: protected CharSet(String[] set) {
169: super ();
170: int sz = set.length;
171: for (int i = 0; i < sz; i++) {
172: add(set[i]);
173: }
174: }
175:
176: //-----------------------------------------------------------------------
177: /**
178: * <p>Add a set definition string to the <code>CharSet</code>.</p>
179: *
180: * @param str set definition string
181: */
182: protected void add(String str) {
183: if (str == null) {
184: return;
185: }
186:
187: int len = str.length();
188: int pos = 0;
189: while (pos < len) {
190: int remainder = (len - pos);
191: if (remainder >= 4 && str.charAt(pos) == '^'
192: && str.charAt(pos + 2) == '-') {
193: // negated range
194: set.add(new CharRange(str.charAt(pos + 1), str
195: .charAt(pos + 3), true));
196: pos += 4;
197: } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
198: // range
199: set.add(new CharRange(str.charAt(pos), str
200: .charAt(pos + 2)));
201: pos += 3;
202: } else if (remainder >= 2 && str.charAt(pos) == '^') {
203: // negated char
204: set.add(new CharRange(str.charAt(pos + 1), true));
205: pos += 2;
206: } else {
207: // char
208: set.add(new CharRange(str.charAt(pos)));
209: pos += 1;
210: }
211: }
212: }
213:
214: //-----------------------------------------------------------------------
215: /**
216: * <p>Gets the internal set as an array of CharRange objects.</p>
217: *
218: * @return an array of immutable CharRange objects
219: * @since 2.0
220: */
221: public CharRange[] getCharRanges() {
222: return (CharRange[]) set.toArray(new CharRange[set.size()]);
223: }
224:
225: //-----------------------------------------------------------------------
226: /**
227: * <p>Does the <code>CharSet</code> contain the specified
228: * character <code>ch</code>.</p>
229: *
230: * @param ch the character to check for
231: * @return <code>true</code> if the set contains the characters
232: */
233: public boolean contains(char ch) {
234: for (Iterator it = set.iterator(); it.hasNext();) {
235: CharRange range = (CharRange) it.next();
236: if (range.contains(ch)) {
237: return true;
238: }
239: }
240: return false;
241: }
242:
243: // Basics
244: //-----------------------------------------------------------------------
245: /**
246: * <p>Compares two CharSet objects, returning true if they represent
247: * exactly the same set of characters defined in the same way.</p>
248: *
249: * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
250: * equal according to this method.</p>
251: *
252: * @param obj the object to compare to
253: * @return true if equal
254: * @since 2.0
255: */
256: public boolean equals(Object obj) {
257: if (obj == this ) {
258: return true;
259: }
260: if (obj instanceof CharSet == false) {
261: return false;
262: }
263: CharSet other = (CharSet) obj;
264: return set.equals(other.set);
265: }
266:
267: /**
268: * <p>Gets a hashCode compatible with the equals method.</p>
269: *
270: * @return a suitable hashCode
271: * @since 2.0
272: */
273: public int hashCode() {
274: return 89 + set.hashCode();
275: }
276:
277: /**
278: * <p>Gets a string representation of the set.</p>
279: *
280: * @return string representation of the set
281: */
282: public String toString() {
283: return set.toString();
284: }
285:
286: }
|