001: /*
002: * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
003: * Copyright (C) 2006 - Javolution (http://javolution.org/)
004: * All rights reserved.
005: *
006: * Permission to use, copy, modify, and distribute this software is
007: * freely granted, provided that this notice is preserved.
008: */
009: package javolution.text;
010:
011: import javolution.lang.Immutable;
012: import javolution.lang.MathLib;
013:
014: /**
015: * <p> This class represents a set of characters.</p>
016: * <p> Instances of this class are typically used for parsing purpose
017: * (faster than regular expressions for simple patterns). For example:[code]
018: * // Integration with Text.
019: * Text number;
020: * int exponentIndex = num.indexOfAny(CharSet.valueOf('e', 'E'));
021: *
022: * // Integration with TextFormat.
023: * public List<Integer> parse(CharSequence csq, TextFormat.Cursor cursor) {
024: * FastTable<Integer> numbers = FastTable.newInstance();
025: * while (cursor.skip(CharSet.WHITESPACES, csq)) {
026: * numbers.add(TypeFormat.parseInt(csq, cursor));
027: * }
028: * return numbers;
029: * }
030: * [/code]
031: *
032: * @author <a href="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
033: * @version 3.7, January 1, 2006
034: */
035: public final class CharSet implements Immutable {
036:
037: /**
038: * Represents an empty character set.
039: */
040: public static final CharSet EMPTY = new CharSet(new long[0]);
041:
042: /**
043: * Represents white spaces characters according to Java
044: * (see {@link Character#isWhitespace(char)}).
045: */
046: public static final CharSet WHITESPACES = CharSet
047: .valueOf(new char[] { 0x9, 0xA, 0xB, 0xC, 0xD, 0x1C, 0x1D,
048: 0x1E, 0x1F, 0x20, 0x1680, 0x180E, 0x2000, 0x2001,
049: 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2008,
050: 0x2009, 0x200A, 0x200B, 0x2028, 0x2029, 0x205F,
051: 0x3000 });
052:
053: /**
054: * Represents spaces characters according to Java
055: * (see {@link Character#isSpaceChar(char)}).
056: */
057: public static final CharSet SPACES = CharSet.valueOf(new char[] {
058: 0x20, 0xA0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
059: 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A,
060: 0x200B, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000 });
061:
062: /**
063: * Represents ISO control characters according to Java
064: * (see {@link Character#isISOControl(char)}).
065: */
066: public static final CharSet ISO_CONTROLS = CharSet
067: .valueOf(new char[] { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6,
068: 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
069: 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
070: 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x7F,
071: 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
072: 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
073: 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
074: 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F });
075:
076: /**
077: * Holds the containment mapping.
078: */
079: private final long[] _mapping;
080:
081: /**
082: * Creates a character set with the specified mapping.
083: *
084: * @param mapping the character set mapping.
085: */
086: private CharSet(long[] mapping) {
087: _mapping = mapping;
088: }
089:
090: /**
091: * Returns the character set holding the specified characters.
092: *
093: * @param chars the characters contained by this character set.
094: * @return the corresponding character set.
095: */
096: public static CharSet valueOf(char[]/*...*/chars) {
097: int maxChar = 0;
098: for (int i = chars.length; --i >= 0;) {
099: if (chars[i] > maxChar) {
100: maxChar = chars[i];
101: }
102: }
103: CharSet charSet = new CharSet(new long[(maxChar >> 6) + 1]);
104: for (int i = chars.length; --i >= 0;) {
105: char c = chars[i];
106: charSet._mapping[c >> 6] |= 1L << (c & 63);
107: }
108: return charSet;
109: }
110:
111: /**
112: * Returns the character set holding the characters in the specified
113: * range.
114: *
115: * @param first the first character.
116: * @param last the last character.
117: * @return the corresponding character set.
118: * @throws IllegalArgumentException if <code>first > last</code>
119: */
120: public static CharSet rangeOf(char first, char last) {
121: if (first > last)
122: throw new IllegalArgumentException(
123: "first should be less or equal to last");
124: CharSet charSet = new CharSet(new long[(last >> 6) + 1]);
125: for (char c = first; c <= last; c++) {
126: charSet._mapping[c >> 6] |= 1L << (c & 63);
127: }
128: return charSet;
129:
130: }
131:
132: /**
133: * Indicates if the specified character is contained by this character set.
134: *
135: * @param c the character to test.
136: * @return <code>true</code> if this character set contains the specified
137: * character; <code>false</code> otherwise.
138: */
139: public boolean contains(char c) {
140: final int i = c >> 6;
141: return i < _mapping.length ? (_mapping[i] & (1L << (c & 63))) != 0
142: : false;
143: }
144:
145: /**
146: * Returns the character set containing the characters from this
147: * character set plus the characters from the character set specified.
148: *
149: * @param that the set containing the characters to be added.
150: * @return <code>this + that</code>
151: */
152: public CharSet plus(CharSet that) {
153: if (that._mapping.length > this ._mapping.length)
154: return that.plus(this );
155: CharSet result = this .copy();
156: for (int i = that._mapping.length; --i >= 0;) {
157: result._mapping[i] |= that._mapping[i];
158: }
159: return result;
160: }
161:
162: /**
163: * Returns the character set containing the characters from this
164: * character minus the characters from the character set specified.
165: *
166: * @param that the set containing the character to be removed.
167: * @return <code>this - that</code>
168: */
169: public CharSet minus(CharSet that) {
170: CharSet result = this .copy();
171: for (int i = MathLib.min(this ._mapping.length,
172: that._mapping.length); --i >= 0;) {
173: result._mapping[i] &= ~that._mapping[i];
174: }
175: return result;
176: }
177:
178: /**
179: * Returns the textual representation of this character set.
180: *
181: * @return the textual representation.
182: */
183: public String toString() {
184: TextBuilder tb = TextBuilder.newInstance();
185: tb.append('{');
186: int length = _mapping.length << 6;
187: for (int i = 0; i < length; i++) {
188: if (this .contains((char) i)) {
189: if (tb.length() > 1) {
190: tb.append(',');
191: tb.append(' ');
192: }
193: tb.append('\'');
194: tb.append((char) i);
195: tb.append('\'');
196: }
197: }
198: tb.append('}');
199: return tb.toString();
200: }
201:
202: /**
203: * Returns a copy of this character set.
204: *
205: * @return an independant copy.
206: */
207: private CharSet copy() {
208: CharSet charSet = new CharSet(new long[this ._mapping.length]);
209: for (int i = _mapping.length; --i >= 0;) {
210: charSet._mapping[i] = _mapping[i];
211: }
212: return charSet;
213: }
214: }
|