001 /*
002 * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004 *
005 * This code is free software; you can redistribute it and/or modify it
006 * under the terms of the GNU General Public License version 2 only, as
007 * published by the Free Software Foundation. Sun designates this
008 * particular file as subject to the "Classpath" exception as provided
009 * by Sun in the LICENSE file that accompanied this code.
010 *
011 * This code is distributed in the hope that it will be useful, but WITHOUT
012 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014 * version 2 for more details (a copy is included in the LICENSE file that
015 * accompanied this code).
016 *
017 * You should have received a copy of the GNU General Public License version
018 * 2 along with this work; if not, write to the Free Software Foundation,
019 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020 *
021 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022 * CA 95054 USA or visit www.sun.com if you need additional information or
023 * have any questions.
024 */
025
026 package java.awt.font;
027
028 /**
029 * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
030 * digits to other Unicode decimal digits. Users of this class will
031 * primarily be people who wish to present data using
032 * national digit shapes, but find it more convenient to represent the
033 * data internally using Latin-1 (European) digits. This does not
034 * interpret the deprecated numeric shape selector character (U+206E).
035 * <p>
036 * Instances of <code>NumericShaper</code> are typically applied
037 * as attributes to text with the
038 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
039 * of the <code>TextAttribute</code> class.
040 * For example, this code snippet causes a <code>TextLayout</code> to
041 * shape European digits to Arabic in an Arabic context:<br>
042 * <blockquote><pre>
043 * Map map = new HashMap();
044 * map.put(TextAttribute.NUMERIC_SHAPING,
045 * NumericShaper.getContextualShaper(NumericShaper.ARABIC));
046 * FontRenderContext frc = ...;
047 * TextLayout layout = new TextLayout(text, map, frc);
048 * layout.draw(g2d, x, y);
049 * </pre></blockquote>
050 * <br>
051 * It is also possible to perform numeric shaping explicitly using instances
052 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
053 * <blockquote><pre>
054 * char[] text = ...;
055 * // shape all EUROPEAN digits (except zero) to ARABIC digits
056 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
057 * shaper.shape(text, start, count);
058 *
059 * // shape European digits to ARABIC digits if preceding text is Arabic, or
060 * // shape European digits to TAMIL digits if preceding text is Tamil, or
061 * // leave European digits alone if there is no preceding text, or
062 * // preceding text is neither Arabic nor Tamil
063 * NumericShaper shaper =
064 * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
065 * NumericShaper.TAMIL,
066 * NumericShaper.EUROPEAN);
067 * shaper.shape(text. start, count);
068 * </pre></blockquote>
069 *
070 * @since 1.4
071 */
072
073 public final class NumericShaper implements java.io.Serializable {
074 /** index of context for contextual shaping - values range from 0 to 18 */
075 private int key;
076
077 /** flag indicating whether to shape contextually (high bit) and which
078 * digit ranges to shape (bits 0-18)
079 */
080 private int mask;
081
082 /** Identifies the Latin-1 (European) and extended range, and
083 * Latin-1 (European) decimal base.
084 */
085 public static final int EUROPEAN = 1 << 0;
086
087 /** Identifies the ARABIC range and decimal base. */
088 public static final int ARABIC = 1 << 1;
089
090 /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
091 public static final int EASTERN_ARABIC = 1 << 2;
092
093 /** Identifies the DEVANAGARI range and decimal base. */
094 public static final int DEVANAGARI = 1 << 3;
095
096 /** Identifies the BENGALI range and decimal base. */
097 public static final int BENGALI = 1 << 4;
098
099 /** Identifies the GURMUKHI range and decimal base. */
100 public static final int GURMUKHI = 1 << 5;
101
102 /** Identifies the GUJARATI range and decimal base. */
103 public static final int GUJARATI = 1 << 6;
104
105 /** Identifies the ORIYA range and decimal base. */
106 public static final int ORIYA = 1 << 7;
107
108 /** Identifies the TAMIL range and decimal base. Tamil does not have a
109 * decimal digit 0 so Latin-1 (European) 0 is used.
110 */
111 public static final int TAMIL = 1 << 8;
112
113 /** Identifies the TELUGU range and decimal base. */
114 public static final int TELUGU = 1 << 9;
115
116 /** Identifies the KANNADA range and decimal base. */
117 public static final int KANNADA = 1 << 10;
118
119 /** Identifies the MALAYALAM range and decimal base. */
120 public static final int MALAYALAM = 1 << 11;
121
122 /** Identifies the THAI range and decimal base. */
123 public static final int THAI = 1 << 12;
124
125 /** Identifies the LAO range and decimal base. */
126 public static final int LAO = 1 << 13;
127
128 /** Identifies the TIBETAN range and decimal base. */
129 public static final int TIBETAN = 1 << 14;
130
131 /** Identifies the MYANMAR range and decimal base. */
132 public static final int MYANMAR = 1 << 15;
133
134 /** Identifies the ETHIOPIC range and decimal base. */
135 public static final int ETHIOPIC = 1 << 16;
136
137 /** Identifies the KHMER range and decimal base. */
138 public static final int KHMER = 1 << 17;
139
140 /** Identifies the MONGOLIAN range and decimal base. */
141 public static final int MONGOLIAN = 1 << 18;
142
143 /** Identifies all ranges, for full contextual shaping. */
144 public static final int ALL_RANGES = 0x0007ffff;
145
146 private static final int EUROPEAN_KEY = 0;
147 private static final int ARABIC_KEY = 1;
148 private static final int EASTERN_ARABIC_KEY = 2;
149 private static final int DEVANAGARI_KEY = 3;
150 private static final int BENGALI_KEY = 4;
151 private static final int GURMUKHI_KEY = 5;
152 private static final int GUJARATI_KEY = 6;
153 private static final int ORIYA_KEY = 7;
154 private static final int TAMIL_KEY = 8;
155 private static final int TELUGU_KEY = 9;
156 private static final int KANNADA_KEY = 10;
157 private static final int MALAYALAM_KEY = 11;
158 private static final int THAI_KEY = 12;
159 private static final int LAO_KEY = 13;
160 private static final int TIBETAN_KEY = 14;
161 private static final int MYANMAR_KEY = 15;
162 private static final int ETHIOPIC_KEY = 16;
163 private static final int KHMER_KEY = 17;
164 private static final int MONGOLIAN_KEY = 18;
165
166 private static final int NUM_KEYS = 19;
167
168 private static final String[] keyNames = { "EUROPEAN", "ARABIC",
169 "EASTERN_ARABIC", "DEVANAGARI", "BENGALI", "GURMUKHI",
170 "GUJARATI", "ORIYA", "TAMIL", "TELUGU", "KANNADA",
171 "MALAYALAM", "THAI", "LAO", "TIBETAN", "MYANMAR",
172 "ETHIOPIC", "KHMER", "MONGOLIAN" };
173
174 private static final int CONTEXTUAL_MASK = 1 << 31;
175
176 private static final char[] bases = { '\u0030' - '\u0030', // EUROPEAN
177 '\u0660' - '\u0030', // ARABIC
178 '\u06f0' - '\u0030', // EASTERN_ARABIC
179 '\u0966' - '\u0030', // DEVANAGARI
180 '\u09e6' - '\u0030', // BENGALI
181 '\u0a66' - '\u0030', // GURMUKHI
182 '\u0ae6' - '\u0030', // GUJARATI
183 '\u0b66' - '\u0030', // ORIYA
184 '\u0be7' - '\u0030', // TAMIL - note missing zero
185 '\u0c66' - '\u0030', // TELUGU
186 '\u0ce6' - '\u0030', // KANNADA
187 '\u0d66' - '\u0030', // MALAYALAM
188 '\u0e50' - '\u0030', // THAI
189 '\u0ed0' - '\u0030', // LAO
190 '\u0f20' - '\u0030', // TIBETAN
191 '\u1040' - '\u0030', // MYANMAR
192 '\u1369' - '\u0030', // ETHIOPIC
193 '\u17e0' - '\u0030', // KHMER
194 '\u1810' - '\u0030', // MONGOLIAN
195 };
196
197 // some ranges adjoin or overlap, rethink if we want to do a binary search on this
198
199 private static final char[] contexts = { '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
200 '\u0600', '\u0700', // ARABIC
201 '\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic
202 '\u0900', '\u0980', // DEVANAGARI
203 '\u0980', '\u0a00', // BENGALI
204 '\u0a00', '\u0a80', // GURMUKHI
205 '\u0a80', '\u0b00', // GUJARATI
206 '\u0b00', '\u0b80', // ORIYA
207 '\u0b80', '\u0c00', // TAMIL - note missing zero
208 '\u0c00', '\u0c80', // TELUGU
209 '\u0c80', '\u0d00', // KANNADA
210 '\u0d00', '\u0d80', // MALAYALAM
211 '\u0e00', '\u0e80', // THAI
212 '\u0e80', '\u0f00', // LAO
213 '\u0f00', '\u1000', // TIBETAN
214 '\u1000', '\u1080', // MYANMAR
215 '\u1200', '\u1380', // ETHIOPIC
216 '\u1780', '\u1800', // KHMER
217 '\u1800', '\u1900', // MONGOLIAN
218 '\uffff', };
219
220 // assume most characters are near each other so probing the cache is infrequent,
221 // and a linear probe is ok.
222
223 private static int ctCache = 0;
224 private static int ctCacheLimit = contexts.length - 2;
225
226 // warning, synchronize access to this as it modifies state
227 private static int getContextKey(char c) {
228 if (c < contexts[ctCache]) {
229 while (ctCache > 0 && c < contexts[ctCache])
230 --ctCache;
231 } else if (c >= contexts[ctCache + 1]) {
232 while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1])
233 ++ctCache;
234 }
235
236 // if we're not in a known range, then return EUROPEAN as the range key
237 return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
238 }
239
240 /*
241 * A range table of strong directional characters (types L, R, AL).
242 * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
243 * characters, odd (right) indexes are starts of ranges of strong directional
244 * characters.
245 */
246 private static char[] strongTable = { '\u0000', '\u0041', '\u005b',
247 '\u0061', '\u007b', '\u00aa', '\u00ab', '\u00b5', '\u00b6',
248 '\u00ba', '\u00bb', '\u00c0', '\u00d7', '\u00d8', '\u00f7',
249 '\u00f8', '\u0220', '\u0222', '\u0234', '\u0250', '\u02ae',
250 '\u02b0', '\u02b9', '\u02bb', '\u02c2', '\u02d0', '\u02d2',
251 '\u02e0', '\u02e5', '\u02ee', '\u02ef', '\u037a', '\u037b',
252 '\u0386', '\u0387', '\u0388', '\u038b', '\u038c', '\u038d',
253 '\u038e', '\u03a2', '\u03a3', '\u03cf', '\u03d0', '\u03d8',
254 '\u03da', '\u03f4', '\u0400', '\u0483', '\u048c', '\u04c5',
255 '\u04c7', '\u04c9', '\u04cb', '\u04cd', '\u04d0', '\u04f6',
256 '\u04f8', '\u04fa', '\u0531', '\u0557', '\u0559', '\u0560',
257 '\u0561', '\u0588', '\u0589', '\u058a', '\u05be', '\u05bf',
258 '\u05c0', '\u05c1', '\u05c3', '\u05c4', '\u05d0', '\u05eb',
259 '\u05f0', '\u05f5', '\u061b', '\u061c', '\u061f', '\u0620',
260 '\u0621', '\u063b', '\u0640', '\u064b', '\u066d', '\u066e',
261 '\u0671', '\u06d6', '\u06e5', '\u06e7', '\u06fa', '\u06ff',
262 '\u0700', '\u070e', '\u0710', '\u0711', '\u0712', '\u072d',
263 '\u0780', '\u07a6', '\u0903', '\u0904', '\u0905', '\u093a',
264 '\u093d', '\u0941', '\u0949', '\u094d', '\u0950', '\u0951',
265 '\u0958', '\u0962', '\u0964', '\u0971', '\u0982', '\u0984',
266 '\u0985', '\u098d', '\u098f', '\u0991', '\u0993', '\u09a9',
267 '\u09aa', '\u09b1', '\u09b2', '\u09b3', '\u09b6', '\u09ba',
268 '\u09be', '\u09c1', '\u09c7', '\u09c9', '\u09cb', '\u09cd',
269 '\u09d7', '\u09d8', '\u09dc', '\u09de', '\u09df', '\u09e2',
270 '\u09e6', '\u09f2', '\u09f4', '\u09fb', '\u0a05', '\u0a0b',
271 '\u0a0f', '\u0a11', '\u0a13', '\u0a29', '\u0a2a', '\u0a31',
272 '\u0a32', '\u0a34', '\u0a35', '\u0a37', '\u0a38', '\u0a3a',
273 '\u0a3e', '\u0a41', '\u0a59', '\u0a5d', '\u0a5e', '\u0a5f',
274 '\u0a66', '\u0a70', '\u0a72', '\u0a75', '\u0a83', '\u0a84',
275 '\u0a85', '\u0a8c', '\u0a8d', '\u0a8e', '\u0a8f', '\u0a92',
276 '\u0a93', '\u0aa9', '\u0aaa', '\u0ab1', '\u0ab2', '\u0ab4',
277 '\u0ab5', '\u0aba', '\u0abd', '\u0ac1', '\u0ac9', '\u0aca',
278 '\u0acb', '\u0acd', '\u0ad0', '\u0ad1', '\u0ae0', '\u0ae1',
279 '\u0ae6', '\u0af0', '\u0b02', '\u0b04', '\u0b05', '\u0b0d',
280 '\u0b0f', '\u0b11', '\u0b13', '\u0b29', '\u0b2a', '\u0b31',
281 '\u0b32', '\u0b34', '\u0b36', '\u0b3a', '\u0b3d', '\u0b3f',
282 '\u0b40', '\u0b41', '\u0b47', '\u0b49', '\u0b4b', '\u0b4d',
283 '\u0b57', '\u0b58', '\u0b5c', '\u0b5e', '\u0b5f', '\u0b62',
284 '\u0b66', '\u0b71', '\u0b83', '\u0b84', '\u0b85', '\u0b8b',
285 '\u0b8e', '\u0b91', '\u0b92', '\u0b96', '\u0b99', '\u0b9b',
286 '\u0b9c', '\u0b9d', '\u0b9e', '\u0ba0', '\u0ba3', '\u0ba5',
287 '\u0ba8', '\u0bab', '\u0bae', '\u0bb6', '\u0bb7', '\u0bba',
288 '\u0bbe', '\u0bc0', '\u0bc1', '\u0bc3', '\u0bc6', '\u0bc9',
289 '\u0bca', '\u0bcd', '\u0bd7', '\u0bd8', '\u0be7', '\u0bf3',
290 '\u0c01', '\u0c04', '\u0c05', '\u0c0d', '\u0c0e', '\u0c11',
291 '\u0c12', '\u0c29', '\u0c2a', '\u0c34', '\u0c35', '\u0c3a',
292 '\u0c41', '\u0c45', '\u0c60', '\u0c62', '\u0c66', '\u0c70',
293 '\u0c82', '\u0c84', '\u0c85', '\u0c8d', '\u0c8e', '\u0c91',
294 '\u0c92', '\u0ca9', '\u0caa', '\u0cb4', '\u0cb5', '\u0cba',
295 '\u0cbe', '\u0cbf', '\u0cc0', '\u0cc5', '\u0cc7', '\u0cc9',
296 '\u0cca', '\u0ccc', '\u0cd5', '\u0cd7', '\u0cde', '\u0cdf',
297 '\u0ce0', '\u0ce2', '\u0ce6', '\u0cf0', '\u0d02', '\u0d04',
298 '\u0d05', '\u0d0d', '\u0d0e', '\u0d11', '\u0d12', '\u0d29',
299 '\u0d2a', '\u0d3a', '\u0d3e', '\u0d41', '\u0d46', '\u0d49',
300 '\u0d4a', '\u0d4d', '\u0d57', '\u0d58', '\u0d60', '\u0d62',
301 '\u0d66', '\u0d70', '\u0d82', '\u0d84', '\u0d85', '\u0d97',
302 '\u0d9a', '\u0db2', '\u0db3', '\u0dbc', '\u0dbd', '\u0dbe',
303 '\u0dc0', '\u0dc7', '\u0dcf', '\u0dd2', '\u0dd8', '\u0de0',
304 '\u0df2', '\u0df5', '\u0e01', '\u0e31', '\u0e32', '\u0e34',
305 '\u0e40', '\u0e47', '\u0e4f', '\u0e5c', '\u0e81', '\u0e83',
306 '\u0e84', '\u0e85', '\u0e87', '\u0e89', '\u0e8a', '\u0e8b',
307 '\u0e8d', '\u0e8e', '\u0e94', '\u0e98', '\u0e99', '\u0ea0',
308 '\u0ea1', '\u0ea4', '\u0ea5', '\u0ea6', '\u0ea7', '\u0ea8',
309 '\u0eaa', '\u0eac', '\u0ead', '\u0eb1', '\u0eb2', '\u0eb4',
310 '\u0ebd', '\u0ebe', '\u0ec0', '\u0ec5', '\u0ec6', '\u0ec7',
311 '\u0ed0', '\u0eda', '\u0edc', '\u0ede', '\u0f00', '\u0f18',
312 '\u0f1a', '\u0f35', '\u0f36', '\u0f37', '\u0f38', '\u0f39',
313 '\u0f3e', '\u0f48', '\u0f49', '\u0f6b', '\u0f7f', '\u0f80',
314 '\u0f85', '\u0f86', '\u0f88', '\u0f8c', '\u0fbe', '\u0fc6',
315 '\u0fc7', '\u0fcd', '\u0fcf', '\u0fd0', '\u1000', '\u1022',
316 '\u1023', '\u1028', '\u1029', '\u102b', '\u102c', '\u102d',
317 '\u1031', '\u1032', '\u1038', '\u1039', '\u1040', '\u1058',
318 '\u10a0', '\u10c6', '\u10d0', '\u10f7', '\u10fb', '\u10fc',
319 '\u1100', '\u115a', '\u115f', '\u11a3', '\u11a8', '\u11fa',
320 '\u1200', '\u1207', '\u1208', '\u1247', '\u1248', '\u1249',
321 '\u124a', '\u124e', '\u1250', '\u1257', '\u1258', '\u1259',
322 '\u125a', '\u125e', '\u1260', '\u1287', '\u1288', '\u1289',
323 '\u128a', '\u128e', '\u1290', '\u12af', '\u12b0', '\u12b1',
324 '\u12b2', '\u12b6', '\u12b8', '\u12bf', '\u12c0', '\u12c1',
325 '\u12c2', '\u12c6', '\u12c8', '\u12cf', '\u12d0', '\u12d7',
326 '\u12d8', '\u12ef', '\u12f0', '\u130f', '\u1310', '\u1311',
327 '\u1312', '\u1316', '\u1318', '\u131f', '\u1320', '\u1347',
328 '\u1348', '\u135b', '\u1361', '\u137d', '\u13a0', '\u13f5',
329 '\u1401', '\u1677', '\u1681', '\u169b', '\u16a0', '\u16f1',
330 '\u1780', '\u17b7', '\u17be', '\u17c6', '\u17c7', '\u17c9',
331 '\u17d4', '\u17db', '\u17dc', '\u17dd', '\u17e0', '\u17ea',
332 '\u1810', '\u181a', '\u1820', '\u1878', '\u1880', '\u18a9',
333 '\u1e00', '\u1e9c', '\u1ea0', '\u1efa', '\u1f00', '\u1f16',
334 '\u1f18', '\u1f1e', '\u1f20', '\u1f46', '\u1f48', '\u1f4e',
335 '\u1f50', '\u1f58', '\u1f59', '\u1f5a', '\u1f5b', '\u1f5c',
336 '\u1f5d', '\u1f5e', '\u1f5f', '\u1f7e', '\u1f80', '\u1fb5',
337 '\u1fb6', '\u1fbd', '\u1fbe', '\u1fbf', '\u1fc2', '\u1fc5',
338 '\u1fc6', '\u1fcd', '\u1fd0', '\u1fd4', '\u1fd6', '\u1fdc',
339 '\u1fe0', '\u1fed', '\u1ff2', '\u1ff5', '\u1ff6', '\u1ffd',
340 '\u200e', '\u2010', '\u207f', '\u2080', '\u2102', '\u2103',
341 '\u2107', '\u2108', '\u210a', '\u2114', '\u2115', '\u2116',
342 '\u2119', '\u211e', '\u2124', '\u2125', '\u2126', '\u2127',
343 '\u2128', '\u2129', '\u212a', '\u212e', '\u212f', '\u2132',
344 '\u2133', '\u213a', '\u2160', '\u2184', '\u2336', '\u237b',
345 '\u2395', '\u2396', '\u249c', '\u24ea', '\u3005', '\u3008',
346 '\u3021', '\u302a', '\u3031', '\u3036', '\u3038', '\u303b',
347 '\u3041', '\u3095', '\u309d', '\u309f', '\u30a1', '\u30fb',
348 '\u30fc', '\u30ff', '\u3105', '\u312d', '\u3131', '\u318f',
349 '\u3190', '\u31b8', '\u3200', '\u321d', '\u3220', '\u3244',
350 '\u3260', '\u327c', '\u327f', '\u32b1', '\u32c0', '\u32cc',
351 '\u32d0', '\u32ff', '\u3300', '\u3377', '\u337b', '\u33de',
352 '\u33e0', '\u33ff', '\u3400', '\u4db6', '\u4e00', '\u9fa6',
353 '\ua000', '\ua48d', '\uac00', '\ud7a4', '\uf900', '\ufa2e',
354 '\ufb00', '\ufb07', '\ufb13', '\ufb18', '\ufb1d', '\ufb1e',
355 '\ufb1f', '\ufb29', '\ufb2a', '\ufb37', '\ufb38', '\ufb3d',
356 '\ufb3e', '\ufb3f', '\ufb40', '\ufb42', '\ufb43', '\ufb45',
357 '\ufb46', '\ufbb2', '\ufbd3', '\ufd3e', '\ufd50', '\ufd90',
358 '\ufd92', '\ufdc8', '\ufdf0', '\ufdfc', '\ufe70', '\ufe73',
359 '\ufe74', '\ufe75', '\ufe76', '\ufefd', '\uff21', '\uff3b',
360 '\uff41', '\uff5b', '\uff66', '\uffbf', '\uffc2', '\uffc8',
361 '\uffca', '\uffd0', '\uffd2', '\uffd8', '\uffda', '\uffdd',
362 '\uffff' // last entry is sentinel, actually never checked
363 };
364
365 // use a binary search with a cache
366
367 private static int stCache = 0;
368
369 // warning, synchronize access to this as it modifies state
370 private static boolean isStrongDirectional(char c) {
371 if (c < strongTable[stCache]) {
372 stCache = search(c, strongTable, 0, stCache);
373 } else if (c >= strongTable[stCache + 1]) {
374 stCache = search(c, strongTable, stCache + 1,
375 strongTable.length - stCache - 1);
376 }
377 return (stCache & 0x1) == 1;
378 }
379
380 static private int getKeyFromMask(int mask) {
381 int key = 0;
382 while (key < NUM_KEYS && ((mask & (1 << key)) == 0)) {
383 ++key;
384 }
385 if (key == NUM_KEYS || ((mask & ~(1 << key)) != 0)) {
386 throw new IllegalArgumentException("invalid shaper: "
387 + Integer.toHexString(mask));
388 }
389 return key;
390 }
391
392 /**
393 * Returns a shaper for the provided unicode range. All
394 * Latin-1 (EUROPEAN) digits are converted
395 * to the corresponding decimal unicode digits.
396 * @param singleRange the specified Unicode range
397 * @return a non-contextual numeric shaper
398 * @throws IllegalArgumentException if the range is not a single range
399 */
400 static public NumericShaper getShaper(int singleRange) {
401 int key = getKeyFromMask(singleRange);
402 return new NumericShaper(key, singleRange);
403 }
404
405 /**
406 * Returns a contextual shaper for the provided unicode range(s).
407 * Latin-1 (EUROPEAN) digits are converted to the decimal digits
408 * corresponding to the range of the preceding text, if the
409 * range is one of the provided ranges. Multiple ranges are
410 * represented by or-ing the values together, such as,
411 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
412 * shaper assumes EUROPEAN as the starting context, that is, if
413 * EUROPEAN digits are encountered before any strong directional
414 * text in the string, the context is presumed to be EUROPEAN, and
415 * so the digits will not shape.
416 * @param ranges the specified Unicode ranges
417 * @return a shaper for the specified ranges
418 */
419 static public NumericShaper getContextualShaper(int ranges) {
420 ranges |= CONTEXTUAL_MASK;
421 return new NumericShaper(EUROPEAN_KEY, ranges);
422 }
423
424 /**
425 * Returns a contextual shaper for the provided unicode range(s).
426 * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
427 * corresponding to the range of the preceding text, if the
428 * range is one of the provided ranges. Multiple ranges are
429 * represented by or-ing the values together, for example,
430 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
431 * shaper uses defaultContext as the starting context.
432 * @param ranges the specified Unicode ranges
433 * @param defaultContext the starting context, such as
434 * <code>NumericShaper.EUROPEAN</code>
435 * @return a shaper for the specified Unicode ranges.
436 * @throws IllegalArgumentException if the specified
437 * <code>defaultContext</code> is not a single valid range.
438 */
439 static public NumericShaper getContextualShaper(int ranges,
440 int defaultContext) {
441 int key = getKeyFromMask(defaultContext);
442 ranges |= CONTEXTUAL_MASK;
443 return new NumericShaper(key, ranges);
444 }
445
446 /**
447 * Private constructor.
448 */
449 private NumericShaper(int key, int mask) {
450 this .key = key;
451 this .mask = mask;
452 }
453
454 /**
455 * Converts the digits in the text that occur between start and
456 * start + count.
457 * @param text an array of characters to convert
458 * @param start the index into <code>text</code> to start
459 * converting
460 * @param count the number of characters in <code>text</code>
461 * to convert
462 * @throws IndexOutOfBoundsException if start or start + count is
463 * out of bounds
464 * @throws NullPointerException if text is null
465 */
466 public void shape(char[] text, int start, int count) {
467 if (text == null) {
468 throw new NullPointerException("text is null");
469 }
470 if ((start < 0) || (start > text.length)
471 || ((start + count) < 0)
472 || ((start + count) > text.length)) {
473 throw new IndexOutOfBoundsException(
474 "bad start or count for text of length "
475 + text.length);
476 }
477
478 if (isContextual()) {
479 shapeContextually(text, start, count, key);
480 } else {
481 shapeNonContextually(text, start, count);
482 }
483 }
484
485 /**
486 * Converts the digits in the text that occur between start and
487 * start + count, using the provided context.
488 * Context is ignored if the shaper is not a contextual shaper.
489 * @param text an array of characters
490 * @param start the index into <code>text</code> to start
491 * converting
492 * @param count the number of characters in <code>text</code>
493 * to convert
494 * @param context the context to which to convert the
495 * characters, such as <code>NumericShaper.EUROPEAN</code>
496 * @throws IndexOutOfBoundsException if start or start + count is
497 * out of bounds
498 * @throws NullPointerException if text is null
499 * @throws IllegalArgumentException if this is a contextual shaper
500 * and the specified <code>context</code> is not a single valid
501 * range.
502 */
503 public void shape(char[] text, int start, int count, int context) {
504 if (text == null) {
505 throw new NullPointerException("text is null");
506 }
507 if ((start < 0) || (start > text.length)
508 || ((start + count) < 0)
509 || ((start + count) > text.length)) {
510 throw new IndexOutOfBoundsException(
511 "bad start or count for text of length "
512 + text.length);
513 }
514
515 if (isContextual()) {
516 int ctxKey = getKeyFromMask(context);
517 shapeContextually(text, start, count, ctxKey);
518 } else {
519 shapeNonContextually(text, start, count);
520 }
521 }
522
523 /**
524 * Returns a <code>boolean</code> indicating whether or not
525 * this shaper shapes contextually.
526 * @return <code>true</code> if this shaper is contextual;
527 * <code>false</code> otherwise.
528 */
529 public boolean isContextual() {
530 return (mask & CONTEXTUAL_MASK) != 0;
531 }
532
533 /**
534 * Returns an <code>int</code> that ORs together the values for
535 * all the ranges that will be shaped.
536 * <p>
537 * For example, to check if a shaper shapes to Arabic, you would use the
538 * following:
539 * <blockquote>
540 * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
541 * </blockquote>
542 * @return the values for all the ranges to be shaped.
543 */
544 public int getRanges() {
545 return mask & ~CONTEXTUAL_MASK;
546 }
547
548 /**
549 * Perform non-contextual shaping.
550 */
551 private void shapeNonContextually(char[] text, int start, int count) {
552 int base = bases[key];
553 char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
554 for (int i = start, e = start + count; i < e; ++i) {
555 char c = text[i];
556 if (c >= minDigit && c <= '\u0039') {
557 text[i] = (char) (c + base);
558 }
559 }
560 }
561
562 /**
563 * Perform contextual shaping.
564 * Synchronized to protect caches used in getContextKey and isStrongDirectional.
565 */
566 private synchronized void shapeContextually(char[] text, int start,
567 int count, int ctxKey) {
568
569 // if we don't support this context, then don't shape
570 if ((mask & (1 << ctxKey)) == 0) {
571 ctxKey = EUROPEAN_KEY;
572 }
573 int lastkey = ctxKey;
574
575 int base = bases[ctxKey];
576 char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
577
578 for (int i = start, e = start + count; i < e; ++i) {
579 char c = text[i];
580 if (c >= minDigit && c <= '\u0039') {
581 text[i] = (char) (c + base);
582 }
583
584 if (isStrongDirectional(c)) {
585 int newkey = getContextKey(c);
586 if (newkey != lastkey) {
587 lastkey = newkey;
588
589 ctxKey = newkey;
590 if (((mask & EASTERN_ARABIC) != 0)
591 && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) {
592 ctxKey = EASTERN_ARABIC_KEY;
593 } else if ((mask & (1 << ctxKey)) == 0) {
594 ctxKey = EUROPEAN_KEY;
595 }
596
597 base = bases[ctxKey];
598
599 minDigit = ctxKey == TAMIL_KEY ? '\u0031'
600 : '\u0030'; // Tamil doesn't use decimal zero
601 }
602 }
603 }
604 }
605
606 /**
607 * Returns a hash code for this shaper.
608 * @return this shaper's hash code.
609 * @see java.lang.Object#hashCode
610 */
611 public int hashCode() {
612 return mask;
613 }
614
615 /**
616 * Returns true if the specified object is an instance of
617 * <code>NumericShaper</code> and shapes identically to this one.
618 * @param o the specified object to compare to this
619 * <code>NumericShaper</code>
620 * @return <code>true</code> if <code>o</code> is an instance
621 * of <code>NumericShaper</code> and shapes in the same way;
622 * <code>false</code> otherwise.
623 * @see java.lang.Object#equals(java.lang.Object)
624 */
625 public boolean equals(Object o) {
626 if (o != null) {
627 try {
628 NumericShaper rhs = (NumericShaper) o;
629 return rhs.mask == mask && rhs.key == key;
630 } catch (ClassCastException e) {
631 }
632 }
633 return false;
634 }
635
636 /**
637 * Returns a <code>String</code> that describes this shaper. This method
638 * is used for debugging purposes only.
639 * @return a <code>String</code> describing this shaper.
640 */
641 public String toString() {
642 StringBuilder buf = new StringBuilder(super .toString());
643
644 buf.append("[contextual:" + isContextual());
645
646 if (isContextual()) {
647 buf.append(", context:" + keyNames[key]);
648 }
649
650 buf.append(", range(s): ");
651 boolean first = true;
652 for (int i = 0; i < NUM_KEYS; ++i) {
653 if ((mask & (1 << i)) != 0) {
654 if (first) {
655 first = false;
656 } else {
657 buf.append(", ");
658 }
659 buf.append(keyNames[i]);
660 }
661 }
662 buf.append(']');
663
664 return buf.toString();
665 }
666
667 /**
668 * Returns the index of the high bit in value (assuming le, actually
669 * power of 2 >= value). value must be positive.
670 */
671 private static int getHighBit(int value) {
672 if (value <= 0) {
673 return -32;
674 }
675
676 int bit = 0;
677
678 if (value >= 1 << 16) {
679 value >>= 16;
680 bit += 16;
681 }
682
683 if (value >= 1 << 8) {
684 value >>= 8;
685 bit += 8;
686 }
687
688 if (value >= 1 << 4) {
689 value >>= 4;
690 bit += 4;
691 }
692
693 if (value >= 1 << 2) {
694 value >>= 2;
695 bit += 2;
696 }
697
698 if (value >= 1 << 1) {
699 value >>= 1;
700 bit += 1;
701 }
702
703 return bit;
704 }
705
706 /**
707 * fast binary search over subrange of array.
708 */
709 private static int search(char value, char[] array, int start,
710 int length) {
711 int power = 1 << getHighBit(length);
712 int extra = length - power;
713 int probe = power;
714 int index = start;
715
716 if (value >= array[index + extra]) {
717 index += extra;
718 }
719
720 while (probe > 1) {
721 probe >>= 1;
722
723 if (value >= array[index + probe]) {
724 index += probe;
725 }
726 }
727
728 return index;
729 }
730 }
|