001: package gnu.text;
002:
003: import java.io.*;
004: import java.util.Hashtable;
005: import gnu.mapping.*;
006:
007: /**
008: * A wrapper for characters.
009: * #author Per Bothner
010: */
011:
012: /*
013: * This is similar to java.lang.Character, so why don't we just use that?
014: * Good question, since this new class makes us a little less compatible
015: * with "standard" Java. However, that should be fairly minor, since
016: * few methods will require Character parameters or arrays (better to
017: * just use chars then).
018: * The Char class uses hashing to ensure that characters are unique.
019: * Thus equal? Char are eq?, which is convenient.
020: * Also, using our own class lets us make sure it implements Printable.
021: * Finally, we can use 32-bit character values to allow for non-Unicode chars.
022: */
023:
024: public class Char implements Printable, Externalizable {
025: // Leave open the possibility for characters beyond Unicode.
026: int value;
027:
028: /** Should only be used for serialization. */
029: public Char() {
030: }
031:
032: private Char(char ch) {
033: value = (int) ch;
034: }
035:
036: private Char(int ch) {
037: value = ch;
038: }
039:
040: public final char charValue() {
041: return (char) value;
042: }
043:
044: public final int intValue() {
045: return value;
046: }
047:
048: public int hashCode() {
049: return value;
050: }
051:
052: static Char[] ascii;
053:
054: static Char temp = new Char(0);
055: static Hashtable hashTable;
056:
057: static {
058: ascii = new Char[128];
059: for (int i = 128; --i >= 0;)
060: ascii[i] = new Char(i);
061: }
062:
063: public static Char make(int ch) {
064: if (ch < 128)
065: return ascii[ch];
066: else {
067: // Re-writing this will allow equals to just use ==. FIXME.
068: temp.value = ch;
069: if (hashTable == null)
070: hashTable = new Hashtable();
071: Object entry = hashTable.get(temp);
072: if (entry != null)
073: return (Char) entry;
074: Char newChar = new Char(ch);
075: hashTable.put(newChar, newChar);
076: return newChar;
077: }
078: }
079:
080: public boolean equals(Object obj) {
081: // This does not work for hashing in make! Redo make! FIXME
082: // return this == obj;
083: return obj != null && (obj instanceof Char)
084: && ((Char) obj).intValue() == value;
085: }
086:
087: static char[] charNameValues = { ' ', '\t', '\n', '\n', '\r', '\f',
088: '\b', '\033', '\177', '\177', '\007', '\0' };
089: static String[] charNames = { "space", "tab", "newline",
090: "linefeed", "return", "page", "backspace", "esc", "del",
091: "rubout", "bel", "nul" };
092:
093: public static int nameToChar(String name) {
094: for (int i = charNames.length; --i >= 0;) {
095: if (charNames[i].equals(name))
096: return charNameValues[i];
097: }
098: for (int i = charNames.length; --i >= 0;) {
099: if (charNames[i].equalsIgnoreCase(name))
100: return charNameValues[i];
101: }
102: int len = name.length();
103: if (len > 1 && name.charAt(0) == 'u') {
104: int value = 0;
105: for (int pos = 1;; pos++) {
106: if (pos == len)
107: return value;
108: int dig = Character.digit(name.charAt(pos), 16);
109: if (dig < 0)
110: break;
111: value = (value << 4) + dig;
112: }
113: }
114:
115: // Check for Emacs control character syntax.
116: if (len == 3 && name.charAt(1) == '-') {
117: char ch = name.charAt(0);
118: if (ch == 'c' || ch == 'C') {
119: ch = name.charAt(2);
120: return ch & 31;
121: }
122: }
123:
124: return -1;
125: }
126:
127: public String toString() {
128: StringBuffer buf = new StringBuffer();
129: buf.append("[Char '");
130: if (value >= (int) ' ' && value < 127)
131: buf.append((char) value);
132: else {
133: buf.append('\\');
134: buf.append(Integer.toOctalString(value));
135: }
136: buf.append("']");
137: return buf.toString();
138: }
139:
140: public static String toScmReadableString(int ch) {
141: StringBuffer sbuf = new StringBuffer(20);
142: sbuf.append("#\\");
143: for (int i = 0; i < charNameValues.length; i++) {
144: if ((char) ch == charNameValues[i]) {
145: sbuf.append(charNames[i]);
146: return sbuf.toString();
147: }
148: }
149: if (ch < 8) {
150: sbuf.append('0'); // make sure there at least two octal digits
151: sbuf.append(ch);
152: } else if (ch < ' ' || ch > 0x7F) {
153: sbuf.append(Integer.toString(ch, 8));
154: } else
155: sbuf.append((char) ch);
156: return sbuf.toString();
157: }
158:
159: public void print(PrintWriter ps) {
160: boolean readable = (ps instanceof OutPort)
161: && ((OutPort) ps).printReadable;
162: char ch = charValue();
163: if (readable)
164: ps.print(toScmReadableString(ch));
165: else
166: ps.print(ch);
167: }
168:
169: /**
170: * @serialData Writes the char value as a char.
171: * If the value is > 0xFFFF, write a pair of surrogate values.
172: * If the value is is a high surrogate only, write it followed by '\0'.
173: */
174: public void writeExternal(ObjectOutput out) throws IOException {
175: if (value > 0xD800) {
176: if (value > 0xFFFF) {
177: out.writeChar(((value - 0x10000) >> 10) + 0xD800);
178: value = (value & 0x3FF) + 0xDC00;
179: } else if (value <= 0xDBFF) {
180: out.writeChar(value);
181: value = '\0';
182: }
183: }
184: out.writeChar(value);
185: }
186:
187: public void readExternal(ObjectInput in) throws IOException,
188: ClassNotFoundException {
189: value = in.readChar();
190: if (value >= 0xD800 && value < 0xDBFF) {
191: char next = in.readChar();
192: if (next >= 0xDC00 && next <= 0xDFFF)
193: value = ((value - 0xD800) << 10) + (next - 0xDC00)
194: + 0x10000;
195: }
196: }
197:
198: public Object readResolve() throws ObjectStreamException {
199: return make(value);
200: }
201:
202: /**
203: * Temporary methods, because Kawa cannot inline primitive comparisons yet.*/
204: public static boolean $Eq(char c1, char c2) {
205: return c1 == c2;
206: }
207:
208: public static boolean $Ls(char c1, char c2) {
209: return c1 < c2;
210: }
211:
212: public static boolean $Gr(char c1, char c2) {
213: return c1 > c2;
214: }
215:
216: public static boolean $Ls$Eq(char c1, char c2) {
217: return c1 <= c2;
218: }
219:
220: public static boolean $Gr$Eq(char c1, char c2) {
221: return c1 >= c2;
222: }
223: }
|