001: /*
002: *
003: * @(#)PatternEntry.java 1.29 06/10/03
004: *
005: * Portions Copyright 2000-2006 Sun Microsystems, Inc. All Rights
006: * Reserved. Use is subject to license terms.
007: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU General Public License version
011: * 2 only, as published by the Free Software Foundation.
012: *
013: * This program is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * General Public License version 2 for more details (a copy is
017: * included at /legal/license.txt).
018: *
019: * You should have received a copy of the GNU General Public License
020: * version 2 along with this work; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022: * 02110-1301 USA
023: *
024: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
025: * Clara, CA 95054 or visit www.sun.com if you need additional
026: * information or have any questions.
027: */
028:
029: /*
030: * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
031: * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
032: *
033: * The original version of this source code and documentation is copyrighted
034: * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
035: * materials are provided under terms of a License Agreement between Taligent
036: * and Sun. This technology is protected by multiple US and International
037: * patents. This notice and attribution to Taligent may not be removed.
038: * Taligent is a registered trademark of Taligent, Inc.
039: *
040: */
041:
042: package java.text;
043:
044: import java.lang.Character;
045:
046: /**
047: * Utility class for normalizing and merging patterns for collation.
048: * This is to be used with MergeCollation for adding patterns to an
049: * existing rule table.
050: * @see MergeCollation
051: * @version 1.21 01/19/00
052: * @author Mark Davis, Helena Shih
053: */
054:
055: class PatternEntry {
056: /**
057: * Gets the current extension, quoted
058: */
059: public void appendQuotedExtension(StringBuffer toAddTo) {
060: appendQuoted(extension, toAddTo);
061: }
062:
063: /**
064: * Gets the current chars, quoted
065: */
066: public void appendQuotedChars(StringBuffer toAddTo) {
067: appendQuoted(chars, toAddTo);
068: }
069:
070: /**
071: * WARNING this is used for searching in a Vector.
072: * Because Vector.indexOf doesn't take a comparator,
073: * this method is ill-defined and ignores strength.
074: */
075: public boolean equals(Object obj) {
076: if (obj == null)
077: return false;
078: PatternEntry other = (PatternEntry) obj;
079: boolean result = chars.equals(other.chars);
080: return result;
081: }
082:
083: public int hashCode() {
084: return chars.hashCode();
085: }
086:
087: /**
088: * For debugging.
089: */
090: public String toString() {
091: StringBuffer result = new StringBuffer();
092: addToBuffer(result, true, false, null);
093: return result.toString();
094: }
095:
096: /**
097: * Gets the strength of the entry.
098: */
099: final int getStrength() {
100: return strength;
101: }
102:
103: /**
104: * Gets the expanding characters of the entry.
105: */
106: final String getExtension() {
107: return extension;
108: }
109:
110: /**
111: * Gets the core characters of the entry.
112: */
113: final String getChars() {
114: return chars;
115: }
116:
117: // ===== privates =====
118:
119: void addToBuffer(StringBuffer toAddTo, boolean showExtension,
120: boolean showWhiteSpace, PatternEntry lastEntry) {
121: if (showWhiteSpace && toAddTo.length() > 0)
122: if (strength == Collator.PRIMARY || lastEntry != null)
123: toAddTo.append('\n');
124: else
125: toAddTo.append(' ');
126: if (lastEntry != null) {
127: toAddTo.append('&');
128: if (showWhiteSpace)
129: toAddTo.append(' ');
130: lastEntry.appendQuotedChars(toAddTo);
131: appendQuotedExtension(toAddTo);
132: if (showWhiteSpace)
133: toAddTo.append(' ');
134: }
135: switch (strength) {
136: case Collator.IDENTICAL:
137: toAddTo.append('=');
138: break;
139: case Collator.TERTIARY:
140: toAddTo.append(',');
141: break;
142: case Collator.SECONDARY:
143: toAddTo.append(';');
144: break;
145: case Collator.PRIMARY:
146: toAddTo.append('<');
147: break;
148: case RESET:
149: toAddTo.append('&');
150: break;
151: case UNSET:
152: toAddTo.append('?');
153: break;
154: }
155: if (showWhiteSpace)
156: toAddTo.append(' ');
157: appendQuoted(chars, toAddTo);
158: if (showExtension && extension.length() != 0) {
159: toAddTo.append('/');
160: appendQuoted(extension, toAddTo);
161: }
162: }
163:
164: static void appendQuoted(String chars, StringBuffer toAddTo) {
165: boolean inQuote = false;
166: char ch = chars.charAt(0);
167: if (Character.isSpaceChar(ch)) {
168: inQuote = true;
169: toAddTo.append('\'');
170: } else {
171: if (PatternEntry.isSpecialChar(ch)) {
172: inQuote = true;
173: toAddTo.append('\'');
174: } else {
175: switch (ch) {
176: case 0x0010:
177: case '\f':
178: case '\r':
179: case '\t':
180: case '\n':
181: case '@':
182: inQuote = true;
183: toAddTo.append('\'');
184: break;
185: case '\'':
186: inQuote = true;
187: toAddTo.append('\'');
188: break;
189: default:
190: if (inQuote) {
191: inQuote = false;
192: toAddTo.append('\'');
193: }
194: break;
195: }
196: }
197: }
198: toAddTo.append(chars);
199: if (inQuote)
200: toAddTo.append('\'');
201: }
202:
203: //========================================================================
204: // Parsing a pattern into a list of PatternEntries....
205: //========================================================================
206:
207: PatternEntry(int strength, StringBuffer chars,
208: StringBuffer extension) {
209: this .strength = strength;
210: this .chars = chars.toString();
211: this .extension = (extension.length() > 0) ? extension
212: .toString() : "";
213: }
214:
215: static class Parser {
216: private String pattern;
217: private int i;
218:
219: public Parser(String pattern) {
220: this .pattern = pattern;
221: this .i = 0;
222: }
223:
224: public PatternEntry next() throws ParseException {
225: int newStrength = UNSET;
226:
227: newChars.setLength(0);
228: newExtension.setLength(0);
229:
230: boolean inChars = true;
231: boolean inQuote = false;
232: mainLoop: while (i < pattern.length()) {
233: char ch = pattern.charAt(i);
234: if (inQuote) {
235: if (ch == '\'') {
236: inQuote = false;
237: } else {
238: if (newChars.length() == 0)
239: newChars.append(ch);
240: else if (inChars)
241: newChars.append(ch);
242: else
243: newExtension.append(ch);
244: }
245: } else
246: switch (ch) {
247: case '=':
248: if (newStrength != UNSET)
249: break mainLoop;
250: newStrength = Collator.IDENTICAL;
251: break;
252: case ',':
253: if (newStrength != UNSET)
254: break mainLoop;
255: newStrength = Collator.TERTIARY;
256: break;
257: case ';':
258: if (newStrength != UNSET)
259: break mainLoop;
260: newStrength = Collator.SECONDARY;
261: break;
262: case '<':
263: if (newStrength != UNSET)
264: break mainLoop;
265: newStrength = Collator.PRIMARY;
266: break;
267: case '&':
268: if (newStrength != UNSET)
269: break mainLoop;
270: newStrength = RESET;
271: break;
272: case '\t':
273: case '\n':
274: case '\f':
275: case '\r':
276: case ' ':
277: break; // skip whitespace
278: case '/':
279: inChars = false;
280: break;
281: case '\'':
282: inQuote = true;
283: ch = pattern.charAt(++i);
284: if (newChars.length() == 0)
285: newChars.append(ch);
286: else if (inChars)
287: newChars.append(ch);
288: else
289: newExtension.append(ch);
290: break;
291: default:
292: if (newStrength == UNSET) {
293: throw new ParseException(
294: "missing char (=,;<&) : "
295: + pattern
296: .substring(
297: i,
298: (i + 10 < pattern
299: .length()) ? i + 10
300: : pattern
301: .length()),
302: i);
303: }
304: if (PatternEntry.isSpecialChar(ch)
305: && (inQuote == false))
306: throw new ParseException(
307: "Unquoted punctuation character : "
308: + Integer.toString(ch, 16),
309: i);
310: if (inChars) {
311: newChars.append(ch);
312: } else {
313: newExtension.append(ch);
314: }
315: break;
316: }
317: i++;
318: }
319: if (newStrength == UNSET)
320: return null;
321: if (newChars.length() == 0) {
322: throw new ParseException(
323: "missing chars (=,;<&): "
324: + pattern
325: .substring(i, (i + 10 < pattern
326: .length()) ? i + 10
327: : pattern.length()), i);
328: }
329:
330: return new PatternEntry(newStrength, newChars, newExtension);
331: }
332:
333: // We re-use these objects in order to improve performance
334: private StringBuffer newChars = new StringBuffer();
335: private StringBuffer newExtension = new StringBuffer();
336:
337: }
338:
339: static boolean isSpecialChar(char ch) {
340: return ((ch == '\u0020')
341: || ((ch <= '\u002F') && (ch >= '\u0022'))
342: || ((ch <= '\u003F') && (ch >= '\u003A'))
343: || ((ch <= '\u0060') && (ch >= '\u005B')) || ((ch <= '\u007E') && (ch >= '\u007B')));
344: }
345:
346: static final int RESET = -2;
347: static final int UNSET = -1;
348:
349: int strength = UNSET;
350: String chars = "";
351: String extension = "";
352: }
|