Source Code Cross Referenced for ConditionalSpecialCasing.java in  » 6.0-JDK-Core » lang » java » lang » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Home
Java Source Code / Java Documentation
1.6.0 JDK Core
2.6.0 JDK Modules
3.6.0 JDK Modules com.sun
4.6.0 JDK Modules com.sun.java
5.6.0 JDK Modules sun
6.6.0 JDK Platform
7.Ajax
8.Apache Harmony Java SE
9.Aspect oriented
10.Authentication Authorization
11.Blogger System
12.Build
13.Byte Code
14.Cache
15.Chart
16.Chat
17.Code Analyzer
18.Collaboration
19.Content Management System
20.Database Client
21.Database DBMS
22.Database JDBC Connection Pool
23.Database ORM
24.Development
25.EJB Server
26.ERP CRM Financial
27.ESB
28.Forum
29.Game
30.GIS
31.Graphic 3D
32.Graphic Library
33.Groupware
34.HTML Parser
35.IDE
36.IDE Eclipse
37.IDE Netbeans
38.Installer
39.Internationalization Localization
40.Inversion of Control
41.Issue Tracking
42.J2EE
43.J2ME
44.JBoss
45.JMS
46.JMX
47.Library
48.Mail Clients
49.Music
50.Net
51.Parser
52.PDF
53.Portal
54.Profiler
55.Project Management
56.Report
57.RSS RDF
58.Rule Engine
59.Science
60.Scripting
61.Search Engine
62.Security
63.Sevlet Container
64.Source Control
65.Swing Library
66.Template Engine
67.Test Coverage
68.Testing
69.UML
70.Web Crawler
71.Web Framework
72.Web Mail
73.Web Server
74.Web Services
75.Web Services apache cxf 2.2.6
76.Web Services AXIS2
77.Wiki Engine
78.Workflow Engines
79.XML
80.XML UI
Java Source Code / Java Documentation » 6.0 JDK Core » lang » java.lang 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001        /*
002         * Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
003         * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004         *
005         * This code is free software; you can redistribute it and/or modify it
006         * under the terms of the GNU General Public License version 2 only, as
007         * published by the Free Software Foundation.  Sun designates this
008         * particular file as subject to the "Classpath" exception as provided
009         * by Sun in the LICENSE file that accompanied this code.
010         *
011         * This code is distributed in the hope that it will be useful, but WITHOUT
012         * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013         * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
014         * version 2 for more details (a copy is included in the LICENSE file that
015         * accompanied this code).
016         *
017         * You should have received a copy of the GNU General Public License version
018         * 2 along with this work; if not, write to the Free Software Foundation,
019         * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020         *
021         * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022         * CA 95054 USA or visit www.sun.com if you need additional information or
023         * have any questions.
024         */
025
026        package java.lang;
027
028        import java.text.BreakIterator;
029        import java.util.HashSet;
030        import java.util.Hashtable;
031        import java.util.Iterator;
032        import java.util.Locale;
033        import sun.text.Normalizer;
034
035        /**
036         * This is a utility class for <code>String.toLowerCase()</code> and
037         * <code>String.toUpperCase()</code>, that handles special casing with
038         * conditions.  In other words, it handles the mappings with conditions
039         * that are defined in 
040         * <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special
041         * Casing Properties</a> file.  
042         * <p>
043         * Note that the unconditional case mappings (including 1:M mappings)
044         * are handled in <code>Character.toLower/UpperCase()</code>.
045         */
046        final class ConditionalSpecialCasing {
047
048            // context conditions.
049            final static int FINAL_CASED = 1;
050            final static int AFTER_SOFT_DOTTED = 2;
051            final static int MORE_ABOVE = 3;
052            final static int AFTER_I = 4;
053            final static int NOT_BEFORE_DOT = 5;
054
055            // combining class definitions
056            final static int COMBINING_CLASS_ABOVE = 230;
057
058            // Special case mapping entries
059            static Entry[] entry = {
060                    //# ================================================================================
061                    //# Conditional mappings
062                    //# ================================================================================
063                    new Entry(0x03A3, new char[] { 0x03C2 },
064                            new char[] { 0x03A3 }, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
065
066                    //# ================================================================================
067                    //# Locale-sensitive mappings
068                    //# ================================================================================
069                    //# Lithuanian
070                    new Entry(0x0307, new char[] { 0x0307 }, new char[] {},
071                            "lt", AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE
072                    new Entry(0x0049, new char[] { 0x0069, 0x0307 },
073                            new char[] { 0x0049 }, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I
074                    new Entry(0x004A, new char[] { 0x006A, 0x0307 },
075                            new char[] { 0x004A }, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J
076                    new Entry(0x012E, new char[] { 0x012F, 0x0307 },
077                            new char[] { 0x012E }, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK
078                    new Entry(0x00CC, new char[] { 0x0069, 0x0307, 0x0300 },
079                            new char[] { 0x00CC }, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
080                    new Entry(0x00CD, new char[] { 0x0069, 0x0307, 0x0301 },
081                            new char[] { 0x00CD }, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
082                    new Entry(0x0128, new char[] { 0x0069, 0x0307, 0x0303 },
083                            new char[] { 0x0128 }, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
084
085                    //# ================================================================================
086                    //# Turkish and Azeri
087                    //	new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
088                    //	new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
089                    new Entry(0x0307, new char[] {}, new char[] { 0x0307 },
090                            "tr", AFTER_I), // # COMBINING DOT ABOVE
091                    new Entry(0x0307, new char[] {}, new char[] { 0x0307 },
092                            "az", AFTER_I), // # COMBINING DOT ABOVE
093                    new Entry(0x0049, new char[] { 0x0131 },
094                            new char[] { 0x0049 }, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
095                    new Entry(0x0049, new char[] { 0x0131 },
096                            new char[] { 0x0049 }, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
097                    new Entry(0x0069, new char[] { 0x0069 },
098                            new char[] { 0x0130 }, "tr", 0), // # LATIN SMALL LETTER I
099                    new Entry(0x0069, new char[] { 0x0069 },
100                            new char[] { 0x0130 }, "az", 0) // # LATIN SMALL LETTER I
101            };
102
103            // A hash table that contains the above entries
104            static Hashtable entryTable = new Hashtable();
105            static {
106                // create hashtable from the entry
107                for (int i = 0; i < entry.length; i++) {
108                    Entry cur = entry[i];
109                    Integer cp = new Integer(cur.getCodePoint());
110                    HashSet set = (HashSet) entryTable.get(cp);
111                    if (set == null) {
112                        set = new HashSet();
113                    }
114                    set.add(cur);
115                    entryTable.put(cp, set);
116                }
117            }
118
119            static int toLowerCaseEx(String src, int index, Locale locale) {
120                char[] result = lookUpTable(src, index, locale, true);
121
122                if (result != null) {
123                    if (result.length == 1) {
124                        return result[0];
125                    } else {
126                        return Character.ERROR;
127                    }
128                } else {
129                    // default to Character class' one
130                    return Character.toLowerCase(src.codePointAt(index));
131                }
132            }
133
134            static int toUpperCaseEx(String src, int index, Locale locale) {
135                char[] result = lookUpTable(src, index, locale, false);
136
137                if (result != null) {
138                    if (result.length == 1) {
139                        return result[0];
140                    } else {
141                        return Character.ERROR;
142                    }
143                } else {
144                    // default to Character class' one
145                    return Character.toUpperCaseEx(src.codePointAt(index));
146                }
147            }
148
149            static char[] toLowerCaseCharArray(String src, int index,
150                    Locale locale) {
151                return lookUpTable(src, index, locale, true);
152            }
153
154            static char[] toUpperCaseCharArray(String src, int index,
155                    Locale locale) {
156                char[] result = lookUpTable(src, index, locale, false);
157                if (result != null) {
158                    return result;
159                } else {
160                    return Character.toUpperCaseCharArray(src
161                            .codePointAt(index));
162                }
163            }
164
165            private static char[] lookUpTable(String src, int index,
166                    Locale locale, boolean bLowerCasing) {
167                HashSet set = (HashSet) entryTable.get(new Integer(src
168                        .codePointAt(index)));
169
170                if (set != null) {
171                    Iterator iter = set.iterator();
172                    String currentLang = locale.getLanguage();
173                    while (iter.hasNext()) {
174                        Entry entry = (Entry) iter.next();
175                        String conditionLang = entry.getLanguage();
176                        if (((conditionLang == null) || (conditionLang
177                                .equals(currentLang)))
178                                && isConditionMet(src, index, locale, entry
179                                        .getCondition())) {
180                            return (bLowerCasing ? entry.getLowerCase() : entry
181                                    .getUpperCase());
182                        }
183                    }
184                }
185
186                return null;
187            }
188
189            private static boolean isConditionMet(String src, int index,
190                    Locale locale, int condition) {
191                switch (condition) {
192                case FINAL_CASED:
193                    return isFinalCased(src, index, locale);
194
195                case AFTER_SOFT_DOTTED:
196                    return isAfterSoftDotted(src, index);
197
198                case MORE_ABOVE:
199                    return isMoreAbove(src, index);
200
201                case AFTER_I:
202                    return isAfterI(src, index);
203
204                case NOT_BEFORE_DOT:
205                    return !isBeforeDot(src, index);
206
207                default:
208                    return true;
209                }
210            }
211
212            /**
213             * Implements the "Final_Cased" condition
214             *
215             * Specification: Within the closest word boundaries containing C, there is a cased
216             * letter before C, and there is no cased letter after C.
217             *
218             * Regular Expression: 
219             *   Before C: [{cased==true}][{wordBoundary!=true}]*
220             *   After C: !([{wordBoundary!=true}]*[{cased}])
221             */
222            private static boolean isFinalCased(String src, int index,
223                    Locale locale) {
224                BreakIterator wordBoundary = BreakIterator
225                        .getWordInstance(locale);
226                wordBoundary.setText(src);
227                int ch;
228
229                // Look for a preceding 'cased' letter
230                for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i); i -= Character
231                        .charCount(ch)) {
232
233                    ch = src.codePointBefore(i);
234                    if (isCased(ch)) {
235
236                        int len = src.length();
237                        // Check that there is no 'cased' letter after the index
238                        for (i = index
239                                + Character.charCount(src.codePointAt(index)); (i < len)
240                                && !wordBoundary.isBoundary(i); i += Character
241                                .charCount(ch)) {
242
243                            ch = src.codePointAt(i);
244                            if (isCased(ch)) {
245                                return false;
246                            }
247                        }
248
249                        return true;
250                    }
251                }
252
253                return false;
254            }
255
256            /**
257             * Implements the "After_I" condition
258             *
259             * Specification: The last preceding base character was an uppercase I,
260             * and there is no intervening combining character class 230 (ABOVE).
261             *
262             * Regular Expression: 
263             *   Before C: [I]([{cc!=230}&{cc!=0}])*
264             */
265            private static boolean isAfterI(String src, int index) {
266                int ch;
267                int cc;
268
269                // Look for the last preceding base character
270                for (int i = index; i > 0; i -= Character.charCount(ch)) {
271
272                    ch = src.codePointBefore(i);
273
274                    if (ch == 'I') {
275                        return true;
276                    } else {
277                        cc = Normalizer.getCombiningClass(ch);
278                        if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
279                            return false;
280                        }
281                    }
282                }
283
284                return false;
285            }
286
287            /**
288             * Implements the "After_Soft_Dotted" condition
289             *
290             * Specification: The last preceding character with combining class
291             * of zero before C was Soft_Dotted, and there is no intervening
292             * combining character class 230 (ABOVE).
293             *
294             * Regular Expression: 
295             *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
296             */
297            private static boolean isAfterSoftDotted(String src, int index) {
298                int ch;
299                int cc;
300
301                // Look for the last preceding character
302                for (int i = index; i > 0; i -= Character.charCount(ch)) {
303
304                    ch = src.codePointBefore(i);
305
306                    if (isSoftDotted(ch)) {
307                        return true;
308                    } else {
309                        cc = Normalizer.getCombiningClass(ch);
310                        if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
311                            return false;
312                        }
313                    }
314                }
315
316                return false;
317            }
318
319            /**
320             * Implements the "More_Above" condition
321             *
322             * Specification: C is followed by one or more characters of combining
323             * class 230 (ABOVE) in the combining character sequence.
324             *
325             * Regular Expression: 
326             *   After C: [{cc!=0}]*[{cc==230}]
327             */
328            private static boolean isMoreAbove(String src, int index) {
329                int ch;
330                int cc;
331                int len = src.length();
332
333                // Look for a following ABOVE combining class character
334                for (int i = index
335                        + Character.charCount(src.codePointAt(index)); i < len; i += Character
336                        .charCount(ch)) {
337
338                    ch = src.codePointAt(i);
339                    cc = Normalizer.getCombiningClass(ch);
340
341                    if (cc == COMBINING_CLASS_ABOVE) {
342                        return true;
343                    } else if (cc == 0) {
344                        return false;
345                    }
346                }
347
348                return false;
349            }
350
351            /**
352             * Implements the "Before_Dot" condition
353             *
354             * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
355             * Any sequence of characters with a combining class that is
356             * neither 0 nor 230 may intervene between the current character
357             * and the combining dot above.
358             *
359             * Regular Expression: 
360             *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
361             */
362            private static boolean isBeforeDot(String src, int index) {
363                int ch;
364                int cc;
365                int len = src.length();
366
367                // Look for a following COMBINING DOT ABOVE
368                for (int i = index
369                        + Character.charCount(src.codePointAt(index)); i < len; i += Character
370                        .charCount(ch)) {
371
372                    ch = src.codePointAt(i);
373
374                    if (ch == '\u0307') {
375                        return true;
376                    } else {
377                        cc = Normalizer.getCombiningClass(ch);
378                        if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
379                            return false;
380                        }
381                    }
382                }
383
384                return false;
385            }
386
387            /**
388             * Examines whether a character is 'cased'.
389             *
390             * A character C is defined to be 'cased' if and only if at least one of
391             * following are true for C: uppercase==true, or lowercase==true, or 
392             * general_category==titlecase_letter.
393             * 
394             * The uppercase and lowercase property values are specified in the data
395             * file DerivedCoreProperties.txt in the Unicode Character Database.
396             */
397            private static boolean isCased(int ch) {
398                int type = Character.getType(ch);
399                if (type == Character.LOWERCASE_LETTER
400                        || type == Character.UPPERCASE_LETTER
401                        || type == Character.TITLECASE_LETTER) {
402                    return true;
403                } else {
404                    // Check for Other_Lowercase and Other_Uppercase
405                    //
406                    if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
407                        // MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
408                        return true;
409                    } else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
410                        // MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
411                        return true;
412                    } else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
413                        // MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
414                        return true;
415                    } else if (ch == 0x0345) {
416                        // COMBINING GREEK YPOGEGRAMMENI
417                        return true;
418                    } else if (ch == 0x037A) {
419                        // GREEK YPOGEGRAMMENI
420                        return true;
421                    } else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
422                        // MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
423                        return true;
424                    } else if ((ch >= 0x2160) && (ch <= 0x217F)) {
425                        // ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
426                        // SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
427                        return true;
428                    } else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
429                        // CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
430                        // CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
431                        return true;
432                    } else {
433                        return false;
434                    }
435                }
436            }
437
438            private static boolean isSoftDotted(int ch) {
439                switch (ch) {
440                case 0x0069: // Soft_Dotted # L&       LATIN SMALL LETTER I
441                case 0x006A: // Soft_Dotted # L&       LATIN SMALL LETTER J
442                case 0x012F: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH OGONEK
443                case 0x0268: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH STROKE
444                case 0x0456: // Soft_Dotted # L&       CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
445                case 0x0458: // Soft_Dotted # L&       CYRILLIC SMALL LETTER JE
446                case 0x1D62: // Soft_Dotted # L&       LATIN SUBSCRIPT SMALL LETTER I
447                case 0x1E2D: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH TILDE BELOW
448                case 0x1ECB: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH DOT BELOW
449                case 0x2071: // Soft_Dotted # L&       SUPERSCRIPT LATIN SMALL LETTER I
450                    return true;
451                default:
452                    return false;
453                }
454            }
455
456            /**
457             * An internal class that represents an entry in the Special Casing Properties. 
458             */
459            static class Entry {
460                int ch;
461                char[] lower;
462                char[] upper;
463                String lang;
464                int condition;
465
466                Entry(int ch, char[] lower, char[] upper, String lang,
467                        int condition) {
468                    this .ch = ch;
469                    this .lower = lower;
470                    this .upper = upper;
471                    this .lang = lang;
472                    this .condition = condition;
473                }
474
475                int getCodePoint() {
476                    return ch;
477                }
478
479                char[] getLowerCase() {
480                    return lower;
481                }
482
483                char[] getUpperCase() {
484                    return upper;
485                }
486
487                String getLanguage() {
488                    return lang;
489                }
490
491                int getCondition() {
492                    return condition;
493                }
494            }
495        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.