001: /*
002: **********************************************************************
003: * Copyright (c) 2001-2004, International Business Machines
004: * Corporation and others. All Rights Reserved.
005: **********************************************************************
006: * Date Name Description
007: * 11/19/2001 aliu Creation.
008: **********************************************************************
009: */
010: package com.ibm.icu.text;
011:
012: import com.ibm.icu.impl.Utility;
013:
014: /**
015: * A transliterator that converts Unicode characters to an escape
016: * form. Examples of escape forms are "U+4E01" and "".
017: * Escape forms have a prefix and suffix, either of which may be
018: * empty, a radix, typically 16 or 10, a minimum digit count,
019: * typically 1, 4, or 8, and a boolean that specifies whether
020: * supplemental characters are handled as 32-bit code points or as two
021: * 16-bit code units. Most escape forms handle 32-bit code points,
022: * but some, such as the Java form, intentionally break them into two
023: * surrogate pairs, for backward compatibility.
024: *
025: * <p>Some escape forms actually have two different patterns, one for
026: * BMP characters (0..FFFF) and one for supplements (>FFFF). To
027: * handle this, a second EscapeTransliterator may be defined that
028: * specifies the pattern to be produced for supplementals. An example
029: * of a form that requires this is the C form, which uses "\\uFFFF"
030: * for BMP characters and "\\U0010FFFF" for supplementals.
031: *
032: * <p>This class is package private. It registers several standard
033: * variants with the system which are then accessed via their IDs.
034: *
035: * @author Alan Liu
036: */
037: class EscapeTransliterator extends Transliterator {
038:
039: /**
040: * The prefix of the escape form; may be empty, but usually isn't.
041: * May not be null.
042: */
043: private String prefix;
044:
045: /**
046: * The prefix of the escape form; often empty. May not be null.
047: */
048: private String suffix;
049:
050: /**
051: * The radix to display the number in. Typically 16 or 10. Must
052: * be in the range 2 to 36.
053: */
054: private int radix;
055:
056: /**
057: * The minimum number of digits. Typically 1, 4, or 8. Values
058: * less than 1 are equivalent to 1.
059: */
060: private int minDigits;
061:
062: /**
063: * If true, supplementals are handled as 32-bit code points. If
064: * false, they are handled as two 16-bit code units.
065: */
066: private boolean grokSupplementals;
067:
068: /**
069: * The form to be used for supplementals. If this is null then
070: * the same form is used for BMP characters and supplementals. If
071: * this is not null and if grokSupplementals is true then the
072: * prefix, suffix, radix, and minDigits of this object are used
073: * for supplementals.
074: */
075: private EscapeTransliterator supplementalHandler;
076:
077: /**
078: * Registers standard variants with the system. Called by
079: * Transliterator during initialization.
080: */
081: static void register() {
082: // Unicode: "U+10FFFF" hex, min=4, max=6
083: Transliterator.registerFactory("Any-Hex/Unicode",
084: new Transliterator.Factory() {
085: public Transliterator getInstance(String ID) {
086: return new EscapeTransliterator(
087: "Any-Hex/Unicode", "U+", "", 16, 4,
088: true, null);
089: }
090: });
091:
092: // Java: "\\uFFFF" hex, min=4, max=4
093: Transliterator.registerFactory("Any-Hex/Java",
094: new Transliterator.Factory() {
095: public Transliterator getInstance(String ID) {
096: return new EscapeTransliterator("Any-Hex/Java",
097: "\\u", "", 16, 4, false, null);
098: }
099: });
100:
101: // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
102: Transliterator.registerFactory("Any-Hex/C",
103: new Transliterator.Factory() {
104: public Transliterator getInstance(String ID) {
105: return new EscapeTransliterator("Any-Hex/C",
106: "\\u", "", 16, 4, true,
107: new EscapeTransliterator("", "\\U", "",
108: 16, 8, true, null));
109: }
110: });
111:
112: // XML: "" hex, min=1, max=6
113: Transliterator.registerFactory("Any-Hex/XML",
114: new Transliterator.Factory() {
115: public Transliterator getInstance(String ID) {
116: return new EscapeTransliterator("Any-Hex/XML",
117: "&#x", ";", 16, 1, true, null);
118: }
119: });
120:
121: // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
122: Transliterator.registerFactory("Any-Hex/XML10",
123: new Transliterator.Factory() {
124: public Transliterator getInstance(String ID) {
125: return new EscapeTransliterator(
126: "Any-Hex/XML10", "&#", ";", 10, 1,
127: true, null);
128: }
129: });
130:
131: // Perl: "\\x{263A}" hex, min=1, max=6
132: Transliterator.registerFactory("Any-Hex/Perl",
133: new Transliterator.Factory() {
134: public Transliterator getInstance(String ID) {
135: return new EscapeTransliterator("Any-Hex/Perl",
136: "\\x{", "}", 16, 1, true, null);
137: }
138: });
139:
140: // Generic
141: Transliterator.registerFactory("Any-Hex",
142: new Transliterator.Factory() {
143: public Transliterator getInstance(String ID) {
144: return new EscapeTransliterator("Any-Hex",
145: "\\u", "", 16, 4, false, null);
146: }
147: });
148: }
149:
150: /**
151: * Constructs an escape transliterator with the given ID and
152: * parameters. See the class member documentation for details.
153: */
154: EscapeTransliterator(String ID, String prefix, String suffix,
155: int radix, int minDigits, boolean grokSupplementals,
156: EscapeTransliterator supplementalHandler) {
157: super (ID, null);
158: this .prefix = prefix;
159: this .suffix = suffix;
160: this .radix = radix;
161: this .minDigits = minDigits;
162: this .grokSupplementals = grokSupplementals;
163: this .supplementalHandler = supplementalHandler;
164: }
165:
166: /**
167: * Implements {@link Transliterator#handleTransliterate}.
168: */
169: protected void handleTransliterate(Replaceable text, Position pos,
170: boolean incremental) {
171: int start = pos.start;
172: int limit = pos.limit;
173:
174: StringBuffer buf = new StringBuffer(prefix);
175: int prefixLen = prefix.length();
176: boolean redoPrefix = false;
177:
178: while (start < limit) {
179: int c = grokSupplementals ? text.char32At(start) : text
180: .charAt(start);
181: int charLen = grokSupplementals ? UTF16.getCharCount(c) : 1;
182:
183: if ((c & 0xFFFF0000) != 0 && supplementalHandler != null) {
184: buf.setLength(0);
185: buf.append(supplementalHandler.prefix);
186: Utility.appendNumber(buf, c, supplementalHandler.radix,
187: supplementalHandler.minDigits);
188: buf.append(supplementalHandler.suffix);
189: redoPrefix = true;
190: } else {
191: if (redoPrefix) {
192: buf.setLength(0);
193: buf.append(prefix);
194: redoPrefix = false;
195: } else {
196: buf.setLength(prefixLen);
197: }
198: Utility.appendNumber(buf, c, radix, minDigits);
199: buf.append(suffix);
200: }
201:
202: text.replace(start, start + charLen, buf.toString());
203: start += buf.length();
204: limit += buf.length() - charLen;
205: }
206:
207: pos.contextLimit += limit - pos.limit;
208: pos.limit = limit;
209: pos.start = start;
210: }
211: }
|