001: /*
002: *
003: *
004: * Copyright 1990-2007 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: */
026:
027: package com.sun.perseus.parser;
028:
029: /**
030: * The <code>UnicodeParser</code> class converts attributes conforming to the
031: * <code><hkern></code>'s <code>u1/u2</code> attributes syntax.
032: *
033: * @version $Id: UnicodeParser.java,v 1.2 2006/04/21 06:40:32 st125089 Exp $
034: */
035: public class UnicodeParser extends AbstractParser {
036: /**
037: * Parses the input unicode range value and turns it into a
038: * set of two (possibly identical) unicode range values.
039: *
040: * @param unicode unicode range string to parse
041: * @return an array of unicode ranges of size 2.
042: * @throws IllegalArgumentException if unicode is null.
043: */
044: public int[][] parseUnicode(final String unicode)
045: throws IllegalArgumentException {
046:
047: setString(unicode);
048:
049: if (unicode.length() == 0) {
050: throw new IllegalArgumentException();
051: }
052:
053: // Slow motion parser to simplify memory allocation
054:
055: // First count the number of ranges (',' seperated)
056: int ranges = 1;
057: while ((current = read()) != -1) {
058: if (current == ',') {
059: ranges++;
060: }
061: }
062:
063: setString(unicode);
064: current = read();
065:
066: int[][] result = new int[ranges][];
067: int cur = 0;
068: while (current != -1) {
069: if (current == 'U') {
070: result[cur] = parseUnicodeRange(',');
071: } else {
072: if (current == ',') {
073: throw new IllegalArgumentException();
074: }
075: result[cur] = new int[2];
076: result[cur][0] = current;
077: result[cur][1] = current;
078: current = read();
079: if (current != ',') {
080: if (current != -1) {
081: throw new IllegalArgumentException();
082: }
083: } else {
084: current = read();
085: }
086: }
087:
088: cur++;
089: }
090:
091: return result;
092: }
093:
094: /**
095: * @param endOn specifies the character value that defines the end of the
096: * unicode value.
097: * @return an array of two integers defining the lower and upper values in
098: * the unicode range.
099: */
100: protected int[] parseUnicodeRange(final char endOn) {
101: current = read();
102: if (current != '+') {
103: throw new IllegalArgumentException();
104: }
105:
106: // Now, read the first unicode value. The acceptable
107: // values are: [0-9A-Fa-f?]
108: StringBuffer sb = new StringBuffer();
109: current = read();
110: loop: while (current != -1 && current != ',') {
111: switch (current) {
112: case '0':
113: case '1':
114: case '2':
115: case '3':
116: case '4':
117: case '5':
118: case '6':
119: case '7':
120: case '8':
121: case '9':
122: case 'a':
123: case 'b':
124: case 'c':
125: case 'd':
126: case 'e':
127: case 'f':
128: case 'A':
129: case 'B':
130: case 'C':
131: case 'D':
132: case 'E':
133: case 'F':
134: case '?':
135: break;
136: case '-':
137: break loop;
138: default:
139: throw new IllegalArgumentException();
140: }
141: sb.append((char) current);
142: current = read();
143: }
144:
145: // If we hit a '-', it means that the
146: // first value is a plain hex value
147: int[] result = new int[2];
148: try {
149: if (current == '-') {
150: result[0] = Integer.parseInt(sb.toString(), 16);
151: current = read();
152: int v = 0;
153: int c = 0;
154: int cur = 0;
155: while (current != -1 && current != ',') {
156: switch (current) {
157: case '0':
158: case '1':
159: case '2':
160: case '3':
161: case '4':
162: case '5':
163: case '6':
164: case '7':
165: case '8':
166: case '9':
167: v = current - 0x30;
168: break;
169: case 'a':
170: case 'b':
171: case 'c':
172: case 'd':
173: case 'e':
174: case 'f':
175: v = current - 0x57;
176: break;
177: case 'A':
178: case 'B':
179: case 'C':
180: case 'D':
181: case 'E':
182: case 'F':
183: v = current - 0x37;
184: break;
185: default:
186: throw new IllegalArgumentException();
187: }
188: if (c > 0) {
189: cur <<= 4;
190: }
191: cur |= (0xff & v);
192: c++;
193: current = read();
194: }
195: if (c == 0) {
196: throw new IllegalArgumentException();
197: }
198: result[1] = cur;
199: } else {
200: String low = sb.toString();
201: String high = low;
202: low = low.replace('?', '0');
203: high = high.replace('?', 'F');
204: result[0] = Integer.parseInt(low, 16);
205: result[1] = Integer.parseInt(high, 16);
206: }
207: } catch (NumberFormatException nfe) {
208: throw new IllegalArgumentException();
209: }
210:
211: return result;
212: }
213:
214: }
|