Source Code Cross Referenced for UCharacterName.java in  » Internationalization-Localization » icu4j » com » ibm » icu » impl » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.impl 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /**
0002:         *******************************************************************************
0003:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
0004:         * others. All Rights Reserved.                                                *
0005:         *******************************************************************************
0006:         */package com.ibm.icu.impl;
0007:
0008:        import java.io.InputStream;
0009:        import java.io.BufferedInputStream;
0010:        import java.io.IOException;
0011:        import java.util.MissingResourceException;
0012:
0013:        import com.ibm.icu.text.UTF16;
0014:        import com.ibm.icu.text.UnicodeSet;
0015:        import com.ibm.icu.lang.UCharacter;
0016:        import com.ibm.icu.lang.UCharacterCategory;
0017:
0018:        /**
0019:         * Internal class to manage character names.
0020:         * Since data for names are stored
0021:         * in an array of char, by default indexes used in this class is refering to
0022:         * a 2 byte count, unless otherwise stated. Cases where the index is refering
0023:         * to a byte count, the index is halved and depending on whether the index is
0024:         * even or odd, the MSB or LSB of the result char at the halved index is
0025:         * returned. For indexes to an array of int, the index is multiplied by 2,
0026:         * result char at the multiplied index and its following char is returned as an
0027:         * int.
0028:         * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
0029:         * Note : 0 - 0x1F are control characters without names in Unicode 3.0
0030:         * @author Syn Wee Quek
0031:         * @since nov0700
0032:         */
0033:
0034:        public final class UCharacterName {
0035:            // public data members ----------------------------------------------
0036:
0037:            /**
0038:             * Number of lines per group
0039:             * 1 << GROUP_SHIFT_
0040:             */
0041:            public static final int LINES_PER_GROUP_ = 1 << 5;
0042:            /**
0043:             * Maximum number of groups
0044:             */
0045:            public int m_groupcount_ = 0;
0046:
0047:            // public methods ---------------------------------------------------
0048:
0049:            /**
0050:             * Gets the only instance of UCharacterName
0051:             * @return only instance of UCharacterName
0052:             * @exception MissingResourceException thrown when reading of name data fails
0053:             */
0054:            public static UCharacterName getInstance() {
0055:                if (INSTANCE_ == null) {
0056:                    try {
0057:                        INSTANCE_ = new UCharacterName();
0058:                    } catch (IOException e) {
0059:                        throw new MissingResourceException(
0060:                                "Could not construct UCharacterName. Missing unames.icu",
0061:                                "", "");
0062:                    } catch (Exception e) {
0063:                        throw new MissingResourceException(e.getMessage(), "",
0064:                                "");
0065:                    }
0066:                }
0067:                return INSTANCE_;
0068:            }
0069:
0070:            /**
0071:             * Retrieve the name of a Unicode code point.
0072:             * Depending on <code>choice</code>, the character name written into the
0073:             * buffer is the "modern" name or the name that was defined in Unicode
0074:             * version 1.0.
0075:             * The name contains only "invariant" characters
0076:             * like A-Z, 0-9, space, and '-'.
0077:             *
0078:             * @param ch the code point for which to get the name.
0079:             * @param choice Selector for which name to get.
0080:             * @return if code point is above 0x1fff, null is returned
0081:             */
0082:            public String getName(int ch, int choice) {
0083:                if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE
0084:                        || choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
0085:                    return null;
0086:                }
0087:
0088:                String result = null;
0089:
0090:                result = getAlgName(ch, choice);
0091:
0092:                // getting normal character name
0093:                if (result == null || result.length() == 0) {
0094:                    if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
0095:                        result = getExtendedName(ch);
0096:                    } else {
0097:                        result = getGroupName(ch, choice);
0098:                    }
0099:                }
0100:
0101:                return result;
0102:            }
0103:
0104:            /**
0105:             * Find a character by its name and return its code point value
0106:             * @param choice selector to indicate if argument name is a Unicode 1.0
0107:             *        or the most current version
0108:             * @param name the name to search for
0109:             * @return code point
0110:             */
0111:            public int getCharFromName(int choice, String name) {
0112:                // checks for illegal arguments
0113:                if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT
0114:                        || name == null || name.length() == 0) {
0115:                    return -1;
0116:                }
0117:
0118:                // try extended names first
0119:                int result = getExtendedChar(name.toLowerCase(), choice);
0120:                if (result >= -1) {
0121:                    return result;
0122:                }
0123:
0124:                String upperCaseName = name.toUpperCase();
0125:                // try algorithmic names first, if fails then try group names
0126:                // int result = getAlgorithmChar(choice, uppercasename);
0127:
0128:                if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
0129:                    int count = 0;
0130:                    if (m_algorithm_ != null) {
0131:                        count = m_algorithm_.length;
0132:                    }
0133:                    for (count--; count >= 0; count--) {
0134:                        result = m_algorithm_[count].getChar(upperCaseName);
0135:                        if (result >= 0) {
0136:                            return result;
0137:                        }
0138:                    }
0139:                }
0140:
0141:                if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
0142:                    result = getGroupChar(upperCaseName,
0143:                            UCharacterNameChoice.UNICODE_CHAR_NAME);
0144:                    if (result == -1) {
0145:                        result = getGroupChar(upperCaseName,
0146:                                UCharacterNameChoice.UNICODE_10_CHAR_NAME);
0147:                    }
0148:                } else {
0149:                    result = getGroupChar(upperCaseName, choice);
0150:                }
0151:                return result;
0152:            }
0153:
0154:            // these are all UCharacterNameIterator use methods -------------------
0155:
0156:            /**
0157:             * Reads a block of compressed lengths of 32 strings and expands them into
0158:             * offsets and lengths for each string. Lengths are stored with a
0159:             * variable-width encoding in consecutive nibbles:
0160:             * If a nibble<0xc, then it is the length itself (0 = empty string).
0161:             * If a nibble>=0xc, then it forms a length value with the following
0162:             * nibble.
0163:             * The offsets and lengths arrays must be at least 33 (one more) long
0164:             * because there is no check here at the end if the last nibble is still
0165:             * used.
0166:             * @param index of group string object in array
0167:             * @param offsets array to store the value of the string offsets
0168:             * @param lengths array to store the value of the string length
0169:             * @return next index of the data string immediately after the lengths
0170:             *         in terms of byte address
0171:             */
0172:            public int getGroupLengths(int index, char offsets[],
0173:                    char lengths[]) {
0174:                char length = 0xffff;
0175:                byte b = 0, n = 0;
0176:                int shift;
0177:                index = index * m_groupsize_; // byte count offsets of group strings
0178:                int stringoffset = UCharacterUtility.toInt(m_groupinfo_[index
0179:                        + OFFSET_HIGH_OFFSET_], m_groupinfo_[index
0180:                        + OFFSET_LOW_OFFSET_]);
0181:
0182:                offsets[0] = 0;
0183:
0184:                // all 32 lengths must be read to get the offset of the first group
0185:                // string
0186:                for (int i = 0; i < LINES_PER_GROUP_; stringoffset++) {
0187:                    b = m_groupstring_[stringoffset];
0188:                    shift = 4;
0189:
0190:                    while (shift >= 0) {
0191:                        // getting nibble
0192:                        n = (byte) ((b >> shift) & 0x0F);
0193:                        if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
0194:                            length = (char) ((n - 12) << 4);
0195:                        } else {
0196:                            if (length != 0xffff) {
0197:                                lengths[i] = (char) ((length | n) + 12);
0198:                            } else {
0199:                                lengths[i] = (char) n;
0200:                            }
0201:
0202:                            if (i < LINES_PER_GROUP_) {
0203:                                offsets[i + 1] = (char) (offsets[i] + lengths[i]);
0204:                            }
0205:
0206:                            length = 0xffff;
0207:                            i++;
0208:                        }
0209:
0210:                        shift -= 4;
0211:                    }
0212:                }
0213:                return stringoffset;
0214:            }
0215:
0216:            /**
0217:             * Gets the name of the argument group index.
0218:             * UnicodeData.txt uses ';' as a field separator, so no field can contain
0219:             * ';' as part of its contents. In unames.icu, it is marked as
0220:             * token[';'] == -1 only if the semicolon is used in the data file - which
0221:             * is iff we have Unicode 1.0 names or ISO comments.
0222:             * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments
0223:             * although we know that it will never be part of a name.
0224:             * Equivalent to ICU4C's expandName.
0225:             * @param index of the group name string in byte count
0226:             * @param length of the group name string
0227:             * @param choice of Unicode 1.0 name or the most current name
0228:             * @return name of the group
0229:             */
0230:            public String getGroupName(int index, int length, int choice) {
0231:                if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME
0232:                        || choice == UCharacterNameChoice.ISO_COMMENT_) {
0233:                    if (';' >= m_tokentable_.length
0234:                            || m_tokentable_[';'] == 0xFFFF) {
0235:                        // skip the modern name
0236:                        int oldindex = index;
0237:                        index += UCharacterUtility.skipByteSubString(
0238:                                m_groupstring_, index, length, (byte) ';');
0239:                        length -= (index - oldindex);
0240:                        if (choice == UCharacterNameChoice.ISO_COMMENT_) {
0241:                            // skips the 1.0 Name to the iso comment part
0242:                            oldindex = index;
0243:                            index += UCharacterUtility.skipByteSubString(
0244:                                    m_groupstring_, index, length, (byte) ';');
0245:                            length -= (index - oldindex);
0246:                        }
0247:                    } else {
0248:                        // the semicolon byte is a token number, therefore only modern
0249:                        // names are stored in unames.dat and there is no such
0250:                        // requested Unicode 1.0 name here
0251:                        length = 0;
0252:                    }
0253:                }
0254:
0255:                synchronized (m_utilStringBuffer_) {
0256:                    m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
0257:                    byte b;
0258:                    char token;
0259:                    for (int i = 0; i < length;) {
0260:                        b = m_groupstring_[index + i];
0261:                        i++;
0262:
0263:                        if (b >= m_tokentable_.length) {
0264:                            if (b == ';') {
0265:                                break;
0266:                            }
0267:                            m_utilStringBuffer_.append(b); // implicit letter
0268:                        } else {
0269:                            token = m_tokentable_[b & 0x00ff];
0270:                            if (token == 0xFFFE) {
0271:                                // this is a lead byte for a double-byte token
0272:                                token = m_tokentable_[b << 8
0273:                                        | (m_groupstring_[index + i] & 0x00ff)];
0274:                                i++;
0275:                            }
0276:                            if (token == 0xFFFF) {
0277:                                if (b == ';') {
0278:                                    // skip the semicolon if we are seeking extended
0279:                                    // names and there was no 2.0 name but there
0280:                                    // is a 1.0 name.
0281:                                    if (m_utilStringBuffer_.length() == 0
0282:                                            && choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
0283:                                        continue;
0284:                                    }
0285:                                    break;
0286:                                }
0287:                                // explicit letter
0288:                                m_utilStringBuffer_.append((char) (b & 0x00ff));
0289:                            } else { // write token word
0290:                                UCharacterUtility.getNullTermByteSubString(
0291:                                        m_utilStringBuffer_, m_tokenstring_,
0292:                                        token);
0293:                            }
0294:                        }
0295:                    }
0296:
0297:                    if (m_utilStringBuffer_.length() > 0) {
0298:                        return m_utilStringBuffer_.toString();
0299:                    }
0300:                }
0301:                return null;
0302:            }
0303:
0304:            /**
0305:             * Retrieves the extended name
0306:             */
0307:            public String getExtendedName(int ch) {
0308:                String result = getName(ch,
0309:                        UCharacterNameChoice.UNICODE_CHAR_NAME);
0310:                if (result == null) {
0311:                    if (getType(ch) == UCharacterCategory.CONTROL) {
0312:                        result = getName(ch,
0313:                                UCharacterNameChoice.UNICODE_10_CHAR_NAME);
0314:                    }
0315:                    if (result == null) {
0316:                        result = getExtendedOr10Name(ch);
0317:                    }
0318:                }
0319:                return result;
0320:            }
0321:
0322:            /**
0323:             * Gets the group index for the codepoint, or the group before it.
0324:             * @param codepoint
0325:             * @return group index containing codepoint or the group before it.
0326:             */
0327:            public int getGroup(int codepoint) {
0328:                int endGroup = m_groupcount_;
0329:                int msb = getCodepointMSB(codepoint);
0330:                int result = 0;
0331:                // binary search for the group of names that contains the one for
0332:                // code
0333:                // find the group that contains codepoint, or the highest before it
0334:                while (result < endGroup - 1) {
0335:                    int gindex = (result + endGroup) >> 1;
0336:                    if (msb < getGroupMSB(gindex)) {
0337:                        endGroup = gindex;
0338:                    } else {
0339:                        result = gindex;
0340:                    }
0341:                }
0342:                return result;
0343:            }
0344:
0345:            /**
0346:             * Gets the extended and 1.0 name when the most current unicode names
0347:             * fail
0348:             * @param ch codepoint
0349:             * @return name of codepoint extended or 1.0
0350:             */
0351:            public String getExtendedOr10Name(int ch) {
0352:                String result = null;
0353:                if (getType(ch) == UCharacterCategory.CONTROL) {
0354:                    result = getName(ch,
0355:                            UCharacterNameChoice.UNICODE_10_CHAR_NAME);
0356:                }
0357:                if (result == null) {
0358:                    int type = getType(ch);
0359:                    // Return unknown if the table of names above is not up to
0360:                    // date.
0361:                    if (type >= TYPE_NAMES_.length) {
0362:                        result = UNKNOWN_TYPE_NAME_;
0363:                    } else {
0364:                        result = TYPE_NAMES_[type];
0365:                    }
0366:                    synchronized (m_utilStringBuffer_) {
0367:                        m_utilStringBuffer_.delete(0, m_utilStringBuffer_
0368:                                .length());
0369:                        m_utilStringBuffer_.append('<');
0370:                        m_utilStringBuffer_.append(result);
0371:                        m_utilStringBuffer_.append('-');
0372:                        String chStr = Integer.toHexString(ch).toUpperCase();
0373:                        int zeros = 4 - chStr.length();
0374:                        while (zeros > 0) {
0375:                            m_utilStringBuffer_.append('0');
0376:                            zeros--;
0377:                        }
0378:                        m_utilStringBuffer_.append(chStr);
0379:                        m_utilStringBuffer_.append('>');
0380:                        result = m_utilStringBuffer_.toString();
0381:                    }
0382:                }
0383:                return result;
0384:            }
0385:
0386:            /**
0387:             * Gets the MSB from the group index
0388:             * @param gindex group index
0389:             * @return the MSB of the group if gindex is valid, -1 otherwise
0390:             */
0391:            public int getGroupMSB(int gindex) {
0392:                if (gindex >= m_groupcount_) {
0393:                    return -1;
0394:                }
0395:                return m_groupinfo_[gindex * m_groupsize_];
0396:            }
0397:
0398:            /**
0399:             * Gets the MSB of the codepoint
0400:             * @param codepoint
0401:             * @return the MSB of the codepoint
0402:             */
0403:            public static int getCodepointMSB(int codepoint) {
0404:                return codepoint >> GROUP_SHIFT_;
0405:            }
0406:
0407:            /**
0408:             * Gets the maximum codepoint + 1 of the group
0409:             * @param msb most significant byte of the group
0410:             * @return limit codepoint of the group
0411:             */
0412:            public static int getGroupLimit(int msb) {
0413:                return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
0414:            }
0415:
0416:            /**
0417:             * Gets the minimum codepoint of the group
0418:             * @param msb most significant byte of the group
0419:             * @return minimum codepoint of the group
0420:             */
0421:            public static int getGroupMin(int msb) {
0422:                return msb << GROUP_SHIFT_;
0423:            }
0424:
0425:            /**
0426:             * Gets the offset to a group
0427:             * @param codepoint
0428:             * @return offset to a group
0429:             */
0430:            public static int getGroupOffset(int codepoint) {
0431:                return codepoint & GROUP_MASK_;
0432:            }
0433:
0434:            /**
0435:             * Gets the minimum codepoint of a group
0436:             * @param codepoint
0437:             * @return minimum codepoint in the group which codepoint belongs to
0438:             */
0439:            ///CLOVER:OFF
0440:            public static int getGroupMinFromCodepoint(int codepoint) {
0441:                return codepoint & ~GROUP_MASK_;
0442:            }
0443:
0444:            ///CLOVER:ON
0445:
0446:            /**
0447:             * Get the Algorithm range length
0448:             * @return Algorithm range length
0449:             */
0450:            public int getAlgorithmLength() {
0451:                return m_algorithm_.length;
0452:            }
0453:
0454:            /**
0455:             * Gets the start of the range
0456:             * @param index algorithm index
0457:             * @return algorithm range start
0458:             */
0459:            public int getAlgorithmStart(int index) {
0460:                return m_algorithm_[index].m_rangestart_;
0461:            }
0462:
0463:            /**
0464:             * Gets the end of the range
0465:             * @param index algorithm index
0466:             * @return algorithm range end
0467:             */
0468:            public int getAlgorithmEnd(int index) {
0469:                return m_algorithm_[index].m_rangeend_;
0470:            }
0471:
0472:            /**
0473:             * Gets the Algorithmic name of the codepoint
0474:             * @param index algorithmic range index
0475:             * @param codepoint
0476:             * @return algorithmic name of codepoint
0477:             */
0478:            public String getAlgorithmName(int index, int codepoint) {
0479:                String result = null;
0480:                synchronized (m_utilStringBuffer_) {
0481:                    m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
0482:                    m_algorithm_[index].appendName(codepoint,
0483:                            m_utilStringBuffer_);
0484:                    result = m_utilStringBuffer_.toString();
0485:                }
0486:                return result;
0487:            }
0488:
0489:            /**
0490:             * Gets the group name of the character
0491:             * @param ch character to get the group name
0492:             * @param choice name choice selector to choose a unicode 1.0 or newer name
0493:             */
0494:            public String getGroupName(int ch, int choice) {
0495:                // gets the msb
0496:                int msb = getCodepointMSB(ch);
0497:                int group = getGroup(ch);
0498:
0499:                // return this if it is an exact match
0500:                if (msb == m_groupinfo_[group * m_groupsize_]) {
0501:                    int index = getGroupLengths(group, m_groupoffsets_,
0502:                            m_grouplengths_);
0503:                    int offset = ch & GROUP_MASK_;
0504:                    return getGroupName(index + m_groupoffsets_[offset],
0505:                            m_grouplengths_[offset], choice);
0506:                }
0507:
0508:                return null;
0509:            }
0510:
0511:            // these are transliterator use methods ---------------------------------
0512:
0513:            /**
0514:             * Gets the maximum length of any codepoint name.
0515:             * Equivalent to uprv_getMaxCharNameLength.
0516:             * @return the maximum length of any codepoint name
0517:             */
0518:            public int getMaxCharNameLength() {
0519:                if (initNameSetsLengths()) {
0520:                    return m_maxNameLength_;
0521:                } else {
0522:                    return 0;
0523:                }
0524:            }
0525:
0526:            /**
0527:             * Gets the maximum length of any iso comments.
0528:             * Equivalent to uprv_getMaxISOCommentLength.
0529:             * @return the maximum length of any codepoint name
0530:             */
0531:            ///CLOVER:OFF
0532:            public int getMaxISOCommentLength() {
0533:                if (initNameSetsLengths()) {
0534:                    return m_maxISOCommentLength_;
0535:                } else {
0536:                    return 0;
0537:                }
0538:            }
0539:
0540:            ///CLOVER:ON
0541:
0542:            /**
0543:             * Fills set with characters that are used in Unicode character names.
0544:             * Equivalent to uprv_getCharNameCharacters.
0545:             * @param set USet to receive characters. Existing contents are deleted.
0546:             */
0547:            public void getCharNameCharacters(UnicodeSet set) {
0548:                convert(m_nameSet_, set);
0549:            }
0550:
0551:            /**
0552:             * Fills set with characters that are used in Unicode character names.
0553:             * Equivalent to uprv_getISOCommentCharacters.
0554:             * @param set USet to receive characters. Existing contents are deleted.
0555:             */
0556:            ///CLOVER:OFF
0557:            public void getISOCommentCharacters(UnicodeSet set) {
0558:                convert(m_ISOCommentSet_, set);
0559:            }
0560:
0561:            ///CLOVER:ON
0562:
0563:            // package private inner class --------------------------------------
0564:
0565:            /**
0566:             * Algorithmic name class
0567:             */
0568:            static final class AlgorithmName {
0569:                // package private data members ----------------------------------
0570:
0571:                /**
0572:                 * Constant type value of the different AlgorithmName
0573:                 */
0574:                static final int TYPE_0_ = 0;
0575:                static final int TYPE_1_ = 1;
0576:
0577:                // package private constructors ----------------------------------
0578:
0579:                /**
0580:                 * Constructor
0581:                 */
0582:                AlgorithmName() {
0583:                }
0584:
0585:                // package private methods ---------------------------------------
0586:
0587:                /**
0588:                 * Sets the information for accessing the algorithmic names
0589:                 * @param rangestart starting code point that lies within this name group
0590:                 * @param rangeend end code point that lies within this name group
0591:                 * @param type algorithm type. There's 2 kinds of algorithmic type. First
0592:                 *        which uses code point as part of its name and the other uses
0593:                 *        variant postfix strings
0594:                 * @param variant algorithmic variant
0595:                 * @return true if values are valid
0596:                 */
0597:                boolean setInfo(int rangestart, int rangeend, byte type,
0598:                        byte variant) {
0599:                    if (rangestart >= UCharacter.MIN_VALUE
0600:                            && rangestart <= rangeend
0601:                            && rangeend <= UCharacter.MAX_VALUE
0602:                            && (type == TYPE_0_ || type == TYPE_1_)) {
0603:                        m_rangestart_ = rangestart;
0604:                        m_rangeend_ = rangeend;
0605:                        m_type_ = type;
0606:                        m_variant_ = variant;
0607:                        return true;
0608:                    }
0609:                    return false;
0610:                }
0611:
0612:                /**
0613:                 * Sets the factor data
0614:                 * @param factor Array of factor
0615:                 * @return true if factors are valid
0616:                 */
0617:                boolean setFactor(char factor[]) {
0618:                    if (factor.length == m_variant_) {
0619:                        m_factor_ = factor;
0620:                        return true;
0621:                    }
0622:                    return false;
0623:                }
0624:
0625:                /**
0626:                 * Sets the name prefix
0627:                 * @param prefix
0628:                 * @return true if prefix is set
0629:                 */
0630:                boolean setPrefix(String prefix) {
0631:                    if (prefix != null && prefix.length() > 0) {
0632:                        m_prefix_ = prefix;
0633:                        return true;
0634:                    }
0635:                    return false;
0636:                }
0637:
0638:                /**
0639:                 * Sets the variant factorized name data
0640:                 * @param string variant factorized name data
0641:                 * @return true if values are set
0642:                 */
0643:                boolean setFactorString(byte string[]) {
0644:                    // factor and variant string can be empty for things like
0645:                    // hanggul code points
0646:                    m_factorstring_ = string;
0647:                    return true;
0648:                }
0649:
0650:                /**
0651:                 * Checks if code point lies in Algorithm object at index
0652:                 * @param ch code point
0653:                 */
0654:                boolean contains(int ch) {
0655:                    return m_rangestart_ <= ch && ch <= m_rangeend_;
0656:                }
0657:
0658:                /**
0659:                 * Appends algorithm name of code point into StringBuffer.
0660:                 * Note this method does not check for validity of code point in Algorithm,
0661:                 * result is undefined if code point does not belong in Algorithm.
0662:                 * @param ch code point
0663:                 * @param str StringBuffer to append to
0664:                 */
0665:                void appendName(int ch, StringBuffer str) {
0666:                    str.append(m_prefix_);
0667:                    switch (m_type_) {
0668:                    case TYPE_0_:
0669:                        // prefix followed by hex digits indicating variants
0670:                        Utility.hex(ch, m_variant_, str);
0671:                        break;
0672:                    case TYPE_1_:
0673:                        // prefix followed by factorized-elements
0674:                        int offset = ch - m_rangestart_;
0675:                        int indexes[] = m_utilIntBuffer_;
0676:                        int factor;
0677:
0678:                        // write elements according to the factors
0679:                        // the factorized elements are determined by modulo
0680:                        // arithmetic
0681:                        synchronized (m_utilIntBuffer_) {
0682:                            for (int i = m_variant_ - 1; i > 0; i--) {
0683:                                factor = m_factor_[i] & 0x00FF;
0684:                                indexes[i] = offset % factor;
0685:                                offset /= factor;
0686:                            }
0687:
0688:                            // we don't need to calculate the last modulus because
0689:                            // start <= code <= end guarantees here that
0690:                            // code <= factors[0]
0691:                            indexes[0] = offset;
0692:
0693:                            // joining up the factorized strings
0694:                            str.append(getFactorString(indexes, m_variant_));
0695:                        }
0696:                        break;
0697:                    }
0698:                }
0699:
0700:                /**
0701:                 * Gets the character for the argument algorithmic name
0702:                 * @return the algorithmic char or -1 otherwise.
0703:                 */
0704:                int getChar(String name) {
0705:                    int prefixlen = m_prefix_.length();
0706:                    if (name.length() < prefixlen
0707:                            || !m_prefix_.equals(name.substring(0, prefixlen))) {
0708:                        return -1;
0709:                    }
0710:
0711:                    switch (m_type_) {
0712:                    case TYPE_0_:
0713:                        try {
0714:                            int result = Integer.parseInt(name
0715:                                    .substring(prefixlen), 16);
0716:                            // does it fit into the range?
0717:                            if (m_rangestart_ <= result
0718:                                    && result <= m_rangeend_) {
0719:                                return result;
0720:                            }
0721:                        } catch (NumberFormatException e) {
0722:                            return -1;
0723:                        }
0724:                        break;
0725:                    case TYPE_1_:
0726:                        // repetitative suffix name comparison done here
0727:                        // offset is the character code - start
0728:                        for (int ch = m_rangestart_; ch <= m_rangeend_; ch++) {
0729:                            int offset = ch - m_rangestart_;
0730:                            int indexes[] = m_utilIntBuffer_;
0731:                            int factor;
0732:
0733:                            // write elements according to the factors
0734:                            // the factorized elements are determined by modulo
0735:                            // arithmetic
0736:                            synchronized (m_utilIntBuffer_) {
0737:                                for (int i = m_variant_ - 1; i > 0; i--) {
0738:                                    factor = m_factor_[i] & 0x00FF;
0739:                                    indexes[i] = offset % factor;
0740:                                    offset /= factor;
0741:                                }
0742:
0743:                                // we don't need to calculate the last modulus
0744:                                // because start <= code <= end guarantees here that
0745:                                // code <= factors[0]
0746:                                indexes[0] = offset;
0747:
0748:                                // joining up the factorized strings
0749:                                if (compareFactorString(indexes, m_variant_,
0750:                                        name, prefixlen)) {
0751:                                    return ch;
0752:                                }
0753:                            }
0754:                        }
0755:                    }
0756:
0757:                    return -1;
0758:                }
0759:
0760:                /**
0761:                 * Adds all chars in the set of algorithmic names into the set.
0762:                 * Equivalent to part of calcAlgNameSetsLengths.
0763:                 * @param set int set to add the chars of the algorithm names into
0764:                 * @param maxlength maximum length to compare to
0765:                 * @return the length that is either maxlength of the length of this
0766:                 *         algorithm name if it is longer than maxlength
0767:                 */
0768:                int add(int set[], int maxlength) {
0769:                    // prefix length
0770:                    int length = UCharacterName.add(set, m_prefix_);
0771:                    switch (m_type_) {
0772:                    case TYPE_0_: {
0773:                        // name = prefix + (range->variant times) hex-digits
0774:                        // prefix
0775:                        length += m_variant_;
0776:                        /* synwee to check
0777:                         * addString(set, (const char *)(range + 1))
0778:                                           + range->variant;*/
0779:                        break;
0780:                    }
0781:                    case TYPE_1_: {
0782:                        // name = prefix factorized-elements
0783:                        // get the set and maximum factor suffix length for each
0784:                        // factor
0785:                        for (int i = m_variant_ - 1; i > 0; i--) {
0786:                            int maxfactorlength = 0;
0787:                            int count = 0;
0788:                            for (int factor = m_factor_[i]; factor > 0; --factor) {
0789:                                synchronized (m_utilStringBuffer_) {
0790:                                    m_utilStringBuffer_.delete(0,
0791:                                            m_utilStringBuffer_.length());
0792:                                    count = UCharacterUtility
0793:                                            .getNullTermByteSubString(
0794:                                                    m_utilStringBuffer_,
0795:                                                    m_factorstring_, count);
0796:                                    UCharacterName
0797:                                            .add(set, m_utilStringBuffer_);
0798:                                    if (m_utilStringBuffer_.length() > maxfactorlength) {
0799:                                        maxfactorlength = m_utilStringBuffer_
0800:                                                .length();
0801:                                    }
0802:                                }
0803:                            }
0804:                            length += maxfactorlength;
0805:                        }
0806:                    }
0807:                    }
0808:                    if (length > maxlength) {
0809:                        return length;
0810:                    }
0811:                    return maxlength;
0812:                }
0813:
0814:                // private data members ------------------------------------------
0815:
0816:                /**
0817:                 * Algorithmic data information
0818:                 */
0819:                private int m_rangestart_;
0820:                private int m_rangeend_;
0821:                private byte m_type_;
0822:                private byte m_variant_;
0823:                private char m_factor_[];
0824:                private String m_prefix_;
0825:                private byte m_factorstring_[];
0826:                /**
0827:                 * Utility StringBuffer
0828:                 */
0829:                private StringBuffer m_utilStringBuffer_ = new StringBuffer();
0830:                /**
0831:                 * Utility int buffer
0832:                 */
0833:                private int m_utilIntBuffer_[] = new int[256];
0834:
0835:                // private methods -----------------------------------------------
0836:
0837:                /**
0838:                 * Gets the indexth string in each of the argument factor block
0839:                 * @param index array with each index corresponding to each factor block
0840:                 * @param length length of the array index
0841:                 * @return the combined string of the array of indexth factor string in
0842:                 *         factor block
0843:                 */
0844:                private String getFactorString(int index[], int length) {
0845:                    int size = m_factor_.length;
0846:                    if (index == null || length != size) {
0847:                        return null;
0848:                    }
0849:
0850:                    synchronized (m_utilStringBuffer_) {
0851:                        m_utilStringBuffer_.delete(0, m_utilStringBuffer_
0852:                                .length());
0853:                        int count = 0;
0854:                        int factor;
0855:                        size--;
0856:                        for (int i = 0; i <= size; i++) {
0857:                            factor = m_factor_[i];
0858:                            count = UCharacterUtility
0859:                                    .skipNullTermByteSubString(m_factorstring_,
0860:                                            count, index[i]);
0861:                            count = UCharacterUtility
0862:                                    .getNullTermByteSubString(
0863:                                            m_utilStringBuffer_,
0864:                                            m_factorstring_, count);
0865:                            if (i != size) {
0866:                                count = UCharacterUtility
0867:                                        .skipNullTermByteSubString(
0868:                                                m_factorstring_, count, factor
0869:                                                        - index[i] - 1);
0870:                            }
0871:                        }
0872:                        return m_utilStringBuffer_.toString();
0873:                    }
0874:                }
0875:
0876:                /**
0877:                 * Compares the indexth string in each of the argument factor block with
0878:                 * the argument string
0879:                 * @param index array with each index corresponding to each factor block
0880:                 * @param length index array length
0881:                 * @param str string to compare with
0882:                 * @param offset of str to start comparison
0883:                 * @return true if string matches
0884:                 */
0885:                private boolean compareFactorString(int index[], int length,
0886:                        String str, int offset) {
0887:                    int size = m_factor_.length;
0888:                    if (index == null || length != size)
0889:                        return false;
0890:
0891:                    int count = 0;
0892:                    int strcount = offset;
0893:                    int factor;
0894:                    size--;
0895:                    for (int i = 0; i <= size; i++) {
0896:                        factor = m_factor_[i];
0897:                        count = UCharacterUtility.skipNullTermByteSubString(
0898:                                m_factorstring_, count, index[i]);
0899:                        strcount = UCharacterUtility
0900:                                .compareNullTermByteSubString(str,
0901:                                        m_factorstring_, strcount, count);
0902:                        if (strcount < 0) {
0903:                            return false;
0904:                        }
0905:
0906:                        if (i != size) {
0907:                            count = UCharacterUtility
0908:                                    .skipNullTermByteSubString(m_factorstring_,
0909:                                            count, factor - index[i]);
0910:                        }
0911:                    }
0912:                    if (strcount != str.length()) {
0913:                        return false;
0914:                    }
0915:                    return true;
0916:                }
0917:            }
0918:
0919:            // package private data members --------------------------------------
0920:
0921:            /**
0922:             * Size of each groups
0923:             */
0924:            int m_groupsize_ = 0;
0925:
0926:            // package private methods --------------------------------------------
0927:
0928:            /**
0929:             * Sets the token data
0930:             * @param token array of tokens
0931:             * @param tokenstring array of string values of the tokens
0932:             * @return false if there is a data error
0933:             */
0934:            boolean setToken(char token[], byte tokenstring[]) {
0935:                if (token != null && tokenstring != null && token.length > 0
0936:                        && tokenstring.length > 0) {
0937:                    m_tokentable_ = token;
0938:                    m_tokenstring_ = tokenstring;
0939:                    return true;
0940:                }
0941:                return false;
0942:            }
0943:
0944:            /**
0945:             * Set the algorithm name information array
0946:             * @param alg Algorithm information array
0947:             * @return true if the group string offset has been set correctly
0948:             */
0949:            boolean setAlgorithm(AlgorithmName alg[]) {
0950:                if (alg != null && alg.length != 0) {
0951:                    m_algorithm_ = alg;
0952:                    return true;
0953:                }
0954:                return false;
0955:            }
0956:
0957:            /**
0958:             * Sets the number of group and size of each group in number of char
0959:             * @param count number of groups
0960:             * @param size size of group in char
0961:             * @return true if group size is set correctly
0962:             */
0963:            boolean setGroupCountSize(int count, int size) {
0964:                if (count <= 0 || size <= 0) {
0965:                    return false;
0966:                }
0967:                m_groupcount_ = count;
0968:                m_groupsize_ = size;
0969:                return true;
0970:            }
0971:
0972:            /**
0973:             * Sets the group name data
0974:             * @param group index information array
0975:             * @param groupstring name information array
0976:             * @return false if there is a data error
0977:             */
0978:            boolean setGroup(char group[], byte groupstring[]) {
0979:                if (group != null && groupstring != null && group.length > 0
0980:                        && groupstring.length > 0) {
0981:                    m_groupinfo_ = group;
0982:                    m_groupstring_ = groupstring;
0983:                    return true;
0984:                }
0985:                return false;
0986:            }
0987:
0988:            // private data members ----------------------------------------------
0989:
0990:            /**
0991:             * Data used in unames.icu
0992:             */
0993:            private char m_tokentable_[];
0994:            private byte m_tokenstring_[];
0995:            private char m_groupinfo_[];
0996:            private byte m_groupstring_[];
0997:            private AlgorithmName m_algorithm_[];
0998:
0999:            /**
1000:             * Group use
1001:             */
1002:            private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1003:            private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1004:
1005:            /**
1006:             * Default name of the name datafile
1007:             */
1008:            private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE
1009:                    + "/unames.icu";
1010:            /**
1011:             * Shift count to retrieve group information
1012:             */
1013:            private static final int GROUP_SHIFT_ = 5;
1014:            /**
1015:             * Mask to retrieve the offset for a particular character within a group
1016:             */
1017:            private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1018:            /**
1019:             * Default buffer size of datafile
1020:             */
1021:            private static final int NAME_BUFFER_SIZE_ = 100000;
1022:
1023:            /**
1024:             * Position of offsethigh in group information array
1025:             */
1026:            private static final int OFFSET_HIGH_OFFSET_ = 1;
1027:
1028:            /**
1029:             * Position of offsetlow in group information array
1030:             */
1031:            private static final int OFFSET_LOW_OFFSET_ = 2;
1032:            /**
1033:             * Double nibble indicator, any nibble > this number has to be combined
1034:             * with its following nibble
1035:             */
1036:            private static final int SINGLE_NIBBLE_MAX_ = 11;
1037:
1038:            /*
1039:             * Maximum length of character names (regular & 1.0).
1040:             */
1041:            //private static int MAX_NAME_LENGTH_ = 0;
1042:            /*
1043:             * Maximum length of ISO comments.
1044:             */
1045:            //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1046:            /**
1047:             * Set of chars used in character names (regular & 1.0).
1048:             * Chars are platform-dependent (can be EBCDIC).
1049:             */
1050:            private int m_nameSet_[] = new int[8];
1051:            /**
1052:             * Set of chars used in ISO comments. (regular & 1.0).
1053:             * Chars are platform-dependent (can be EBCDIC).
1054:             */
1055:            private int m_ISOCommentSet_[] = new int[8];
1056:            /**
1057:             * Utility StringBuffer
1058:             */
1059:            private StringBuffer m_utilStringBuffer_ = new StringBuffer();
1060:            /**
1061:             * Utility int buffer
1062:             */
1063:            private int m_utilIntBuffer_[] = new int[2];
1064:            /**
1065:             * Maximum ISO comment length
1066:             */
1067:            private int m_maxISOCommentLength_;
1068:            /**
1069:             * Maximum name length
1070:             */
1071:            private int m_maxNameLength_;
1072:            /**
1073:             * Singleton instance
1074:             */
1075:            private static UCharacterName INSTANCE_ = null;
1076:            /**
1077:             * Type names used for extended names
1078:             */
1079:            private static final String TYPE_NAMES_[] = { "unassigned",
1080:                    "uppercase letter", "lowercase letter", "titlecase letter",
1081:                    "modifier letter", "other letter", "non spacing mark",
1082:                    "enclosing mark", "combining spacing mark",
1083:                    "decimal digit number", "letter number", "other number",
1084:                    "space separator", "line separator", "paragraph separator",
1085:                    "control", "format", "private use area", "surrogate",
1086:                    "dash punctuation", "start punctuation", "end punctuation",
1087:                    "connector punctuation", "other punctuation",
1088:                    "math symbol", "currency symbol", "modifier symbol",
1089:                    "other symbol", "initial punctuation", "final punctuation",
1090:                    "noncharacter", "lead surrogate", "trail surrogate" };
1091:            /**
1092:             * Unknown type name
1093:             */
1094:            private static final String UNKNOWN_TYPE_NAME_ = "unknown";
1095:            /**
1096:             * Not a character type
1097:             */
1098:            private static final int NON_CHARACTER_ = UCharacterCategory.CHAR_CATEGORY_COUNT;
1099:            /**
1100:             * Lead surrogate type
1101:             */
1102:            private static final int LEAD_SURROGATE_ = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1103:            /**
1104:             * Trail surrogate type
1105:             */
1106:            private static final int TRAIL_SURROGATE_ = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1107:            /**
1108:             * Extended category count
1109:             */
1110:            static final int EXTENDED_CATEGORY_ = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1111:
1112:            // private constructor ------------------------------------------------
1113:
1114:            /**
1115:             * <p>Protected constructor for use in UCharacter.</p>
1116:             * @exception IOException thrown when data reading fails
1117:             */
1118:            private UCharacterName() throws IOException {
1119:                InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_);
1120:                BufferedInputStream b = new BufferedInputStream(is,
1121:                        NAME_BUFFER_SIZE_);
1122:                UCharacterNameReader reader = new UCharacterNameReader(b);
1123:                reader.read(this );
1124:                b.close();
1125:            }
1126:
1127:            // private methods ---------------------------------------------------
1128:
1129:            /**
1130:             * Gets the algorithmic name for the argument character
1131:             * @param ch character to determine name for
1132:             * @param choice name choice
1133:             * @return the algorithmic name or null if not found
1134:             */
1135:            private String getAlgName(int ch, int choice) {
1136:                // Do not write algorithmic Unicode 1.0 names because Unihan names are
1137:                // the same as the modern ones, extension A was only introduced with
1138:                // Unicode 3.0, and the Hangul syllable block was moved and changed
1139:                // around Unicode 1.1.5.
1140:                if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
1141:                    // index in terms integer index
1142:                    synchronized (m_utilStringBuffer_) {
1143:                        m_utilStringBuffer_.delete(0, m_utilStringBuffer_
1144:                                .length());
1145:
1146:                        for (int index = m_algorithm_.length - 1; index >= 0; index--) {
1147:                            if (m_algorithm_[index].contains(ch)) {
1148:                                m_algorithm_[index].appendName(ch,
1149:                                        m_utilStringBuffer_);
1150:                                return m_utilStringBuffer_.toString();
1151:                            }
1152:                        }
1153:                    }
1154:                }
1155:                return null;
1156:            }
1157:
1158:            /**
1159:             * Getting the character with the tokenized argument name
1160:             * @param name of the character
1161:             * @return character with the tokenized argument name or -1 if character
1162:             *         is not found
1163:             */
1164:            private synchronized int getGroupChar(String name, int choice) {
1165:                for (int i = 0; i < m_groupcount_; i++) {
1166:                    // populating the data set of grouptable
1167:
1168:                    int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1169:                            m_grouplengths_);
1170:
1171:                    // shift out to function
1172:                    int result = getGroupChar(startgpstrindex, m_grouplengths_,
1173:                            name, choice);
1174:                    if (result != -1) {
1175:                        return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1176:                                | result;
1177:                    }
1178:                }
1179:                return -1;
1180:            }
1181:
1182:            /**
1183:             * Compares and retrieve character if name is found within the argument
1184:             * group
1185:             * @param index index where the set of names reside in the group block
1186:             * @param length list of lengths of the strings
1187:             * @param name character name to search for
1188:             * @param choice of either 1.0 or the most current unicode name
1189:             * @return relative character in the group which matches name, otherwise if
1190:             *         not found, -1 will be returned
1191:             */
1192:            private int getGroupChar(int index, char length[], String name,
1193:                    int choice) {
1194:                byte b = 0;
1195:                char token;
1196:                int len;
1197:                int namelen = name.length();
1198:                int nindex;
1199:                int count;
1200:
1201:                for (int result = 0; result <= LINES_PER_GROUP_; result++) {
1202:                    nindex = 0;
1203:                    len = length[result];
1204:
1205:                    if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
1206:                        int oldindex = index;
1207:                        index += UCharacterUtility.skipByteSubString(
1208:                                m_groupstring_, index, len, (byte) ';');
1209:                        len -= (index - oldindex);
1210:                    }
1211:
1212:                    // number of tokens is > the length of the name
1213:                    // write each letter directly, and write a token word per token
1214:                    for (count = 0; count < len && nindex != -1
1215:                            && nindex < namelen;) {
1216:                        b = m_groupstring_[index + count];
1217:                        count++;
1218:
1219:                        if (b >= m_tokentable_.length) {
1220:                            if (name.charAt(nindex++) != (b & 0xFF)) {
1221:                                nindex = -1;
1222:                            }
1223:                        } else {
1224:                            token = m_tokentable_[b & 0xFF];
1225:                            if (token == 0xFFFE) {
1226:                                // this is a lead byte for a double-byte token
1227:                                token = m_tokentable_[b << 8
1228:                                        | (m_groupstring_[index + count] & 0x00ff)];
1229:                                count++;
1230:                            }
1231:                            if (token == 0xFFFF) {
1232:                                if (name.charAt(nindex++) != (b & 0xFF)) {
1233:                                    nindex = -1;
1234:                                }
1235:                            } else {
1236:                                // compare token with name
1237:                                nindex = UCharacterUtility
1238:                                        .compareNullTermByteSubString(name,
1239:                                                m_tokenstring_, nindex, token);
1240:                            }
1241:                        }
1242:                    }
1243:
1244:                    if (namelen == nindex
1245:                            && (count == len || m_groupstring_[index + count] == ';')) {
1246:                        return result;
1247:                    }
1248:
1249:                    index += len;
1250:                }
1251:                return -1;
1252:            }
1253:
1254:            /**
1255:             * Gets the character extended type
1256:             * @param ch character to be tested
1257:             * @return extended type it is associated with
1258:             */
1259:            private static int getType(int ch) {
1260:                if (UCharacterUtility.isNonCharacter(ch)) {
1261:                    // not a character we return a invalid category count
1262:                    return NON_CHARACTER_;
1263:                }
1264:                int result = UCharacter.getType(ch);
1265:                if (result == UCharacterCategory.SURROGATE) {
1266:                    if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1267:                        result = LEAD_SURROGATE_;
1268:                    } else {
1269:                        result = TRAIL_SURROGATE_;
1270:                    }
1271:                }
1272:                return result;
1273:            }
1274:
1275:            /**
1276:             * Getting the character with extended name of the form <....>.
1277:             * @param name of the character to be found
1278:             * @param choice name choice
1279:             * @return character associated with the name, -1 if such character is not
1280:             *                   found and -2 if we should continue with the search.
1281:             */
1282:            private static int getExtendedChar(String name, int choice) {
1283:                if (name.charAt(0) == '<') {
1284:                    if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1285:                        int endIndex = name.length() - 1;
1286:                        if (name.charAt(endIndex) == '>') {
1287:                            int startIndex = name.lastIndexOf('-');
1288:                            if (startIndex >= 0) { // We've got a category.
1289:                                startIndex++;
1290:                                int result = -1;
1291:                                try {
1292:                                    result = Integer.parseInt(name.substring(
1293:                                            startIndex, endIndex), 16);
1294:                                } catch (NumberFormatException e) {
1295:                                    return -1;
1296:                                }
1297:                                // Now validate the category name. We could use a
1298:                                // binary search, or a trie, if we really wanted to.
1299:                                String type = name.substring(1, startIndex - 1);
1300:                                int length = TYPE_NAMES_.length;
1301:                                for (int i = 0; i < length; ++i) {
1302:                                    if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1303:                                        if (getType(result) == i) {
1304:                                            return result;
1305:                                        }
1306:                                        break;
1307:                                    }
1308:                                }
1309:                            }
1310:                        }
1311:                    }
1312:                    return -1;
1313:                }
1314:                return -2;
1315:            }
1316:
1317:            // sets of name characters, maximum name lengths -----------------------
1318:
1319:            /**
1320:             * Adds a codepoint into a set of ints.
1321:             * Equivalent to SET_ADD.
1322:             * @param set set to add to
1323:             * @param ch 16 bit char to add
1324:             */
1325:            private static void add(int set[], char ch) {
1326:                set[ch >>> 5] |= 1 << (ch & 0x1f);
1327:            }
1328:
1329:            /**
1330:             * Checks if a codepoint is a part of a set of ints.
1331:             * Equivalent to SET_CONTAINS.
1332:             * @param set set to check in
1333:             * @param ch 16 bit char to check
1334:             * @return true if codepoint is part of the set, false otherwise
1335:             */
1336:            private static boolean contains(int set[], char ch) {
1337:                return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1338:            }
1339:
1340:            /**
1341:             * Adds all characters of the argument str and gets the length
1342:             * Equivalent to calcStringSetLength.
1343:             * @param set set to add all chars of str to
1344:             * @param str string to add
1345:             */
1346:            private static int add(int set[], String str) {
1347:                int result = str.length();
1348:
1349:                for (int i = result - 1; i >= 0; i--) {
1350:                    add(set, str.charAt(i));
1351:                }
1352:                return result;
1353:            }
1354:
1355:            /**
1356:             * Adds all characters of the argument str and gets the length
1357:             * Equivalent to calcStringSetLength.
1358:             * @param set set to add all chars of str to
1359:             * @param str string to add
1360:             */
1361:            private static int add(int set[], StringBuffer str) {
1362:                int result = str.length();
1363:
1364:                for (int i = result - 1; i >= 0; i--) {
1365:                    add(set, str.charAt(i));
1366:                }
1367:                return result;
1368:            }
1369:
1370:            /**
1371:             * Adds all algorithmic names into the name set.
1372:             * Equivalent to part of calcAlgNameSetsLengths.
1373:             * @param maxlength length to compare to
1374:             * @return the maximum length of any possible algorithmic name if it is >
1375:             *         maxlength, otherwise maxlength is returned.
1376:             */
1377:            private int addAlgorithmName(int maxlength) {
1378:                int result = 0;
1379:                for (int i = m_algorithm_.length - 1; i >= 0; i--) {
1380:                    result = m_algorithm_[i].add(m_nameSet_, maxlength);
1381:                    if (result > maxlength) {
1382:                        maxlength = result;
1383:                    }
1384:                }
1385:                return maxlength;
1386:            }
1387:
1388:            /**
1389:             * Adds all extended names into the name set.
1390:             * Equivalent to part of calcExtNameSetsLengths.
1391:             * @param maxlength length to compare to
1392:             * @return the maxlength of any possible extended name.
1393:             */
1394:            private int addExtendedName(int maxlength) {
1395:                for (int i = TYPE_NAMES_.length - 1; i >= 0; i--) {
1396:                    // for each category, count the length of the category name
1397:                    // plus 9 =
1398:                    // 2 for <>
1399:                    // 1 for -
1400:                    // 6 for most hex digits per code point
1401:                    int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1402:                    if (length > maxlength) {
1403:                        maxlength = length;
1404:                    }
1405:                }
1406:                return maxlength;
1407:            }
1408:
1409:            /**
1410:             * Adds names of a group to the argument set.
1411:             * Equivalent to calcNameSetLength.
1412:             * @param offset of the group name string in byte count
1413:             * @param length of the group name string
1414:             * @param tokenlength array to store the length of each token
1415:             * @param set to add to
1416:             * @return the length of the name string and the length of the group
1417:             *         string parsed
1418:             */
1419:            private int[] addGroupName(int offset, int length,
1420:                    byte tokenlength[], int set[]) {
1421:                int resultnlength = 0;
1422:                int resultplength = 0;
1423:                while (resultplength < length) {
1424:                    char b = (char) (m_groupstring_[offset + resultplength] & 0xff);
1425:                    resultplength++;
1426:                    if (b == ';') {
1427:                        break;
1428:                    }
1429:
1430:                    if (b >= m_tokentable_.length) {
1431:                        add(set, b); // implicit letter
1432:                        resultnlength++;
1433:                    } else {
1434:                        char token = m_tokentable_[b & 0x00ff];
1435:                        if (token == 0xFFFE) {
1436:                            // this is a lead byte for a double-byte token
1437:                            b = (char) (b << 8 | (m_groupstring_[offset
1438:                                    + resultplength] & 0x00ff));
1439:                            token = m_tokentable_[b];
1440:                            resultplength++;
1441:                        }
1442:                        if (token == 0xFFFF) {
1443:                            add(set, b);
1444:                            resultnlength++;
1445:                        } else {
1446:                            // count token word
1447:                            // use cached token length
1448:                            byte tlength = tokenlength[b];
1449:                            if (tlength == 0) {
1450:                                synchronized (m_utilStringBuffer_) {
1451:                                    m_utilStringBuffer_.delete(0,
1452:                                            m_utilStringBuffer_.length());
1453:                                    UCharacterUtility.getNullTermByteSubString(
1454:                                            m_utilStringBuffer_,
1455:                                            m_tokenstring_, token);
1456:                                    tlength = (byte) add(set,
1457:                                            m_utilStringBuffer_);
1458:                                }
1459:                                tokenlength[b] = tlength;
1460:                            }
1461:                            resultnlength += tlength;
1462:                        }
1463:                    }
1464:                }
1465:                m_utilIntBuffer_[0] = resultnlength;
1466:                m_utilIntBuffer_[1] = resultplength;
1467:                return m_utilIntBuffer_;
1468:            }
1469:
1470:            /**
1471:             * Adds names of all group to the argument set.
1472:             * Sets the data member m_max*Length_.
1473:             * Method called only once.
1474:             * Equivalent to calcGroupNameSetsLength.
1475:             * @param maxlength length to compare to
1476:             */
1477:            private void addGroupName(int maxlength) {
1478:                int maxisolength = 0;
1479:                char offsets[] = new char[LINES_PER_GROUP_ + 2];
1480:                char lengths[] = new char[LINES_PER_GROUP_ + 2];
1481:                byte tokenlengths[] = new byte[m_tokentable_.length];
1482:
1483:                // enumerate all groups
1484:                // for (int i = m_groupcount_ - 1; i >= 0; i --) {
1485:                for (int i = 0; i < m_groupcount_; i++) {
1486:                    int offset = getGroupLengths(i, offsets, lengths);
1487:                    // enumerate all lines in each group
1488:                    // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1489:                    //    linenumber --) {
1490:                    for (int linenumber = 0; linenumber < LINES_PER_GROUP_; linenumber++) {
1491:                        int lineoffset = offset + offsets[linenumber];
1492:                        int length = lengths[linenumber];
1493:                        if (length == 0) {
1494:                            continue;
1495:                        }
1496:
1497:                        // read regular name
1498:                        int parsed[] = addGroupName(lineoffset, length,
1499:                                tokenlengths, m_nameSet_);
1500:                        if (parsed[0] > maxlength) {
1501:                            // 0 for name length
1502:                            maxlength = parsed[0];
1503:                        }
1504:                        lineoffset += parsed[1];
1505:                        if (parsed[1] >= length) {
1506:                            // 1 for parsed group string length
1507:                            continue;
1508:                        }
1509:                        length -= parsed[1];
1510:                        // read Unicode 1.0 name
1511:                        parsed = addGroupName(lineoffset, length, tokenlengths,
1512:                                m_nameSet_);
1513:                        if (parsed[0] > maxlength) {
1514:                            // 0 for name length
1515:                            maxlength = parsed[0];
1516:                        }
1517:                        lineoffset += parsed[1];
1518:                        if (parsed[1] >= length) {
1519:                            // 1 for parsed group string length
1520:                            continue;
1521:                        }
1522:                        length -= parsed[1];
1523:                        // read ISO comment
1524:                        parsed = addGroupName(lineoffset, length, tokenlengths,
1525:                                m_ISOCommentSet_);
1526:                        if (parsed[1] > maxisolength) {
1527:                            maxisolength = length;
1528:                        }
1529:                    }
1530:                }
1531:
1532:                // set gMax... - name length last for threading
1533:                m_maxISOCommentLength_ = maxisolength;
1534:                m_maxNameLength_ = maxlength;
1535:            }
1536:
1537:            /**
1538:             * Sets up the name sets and the calculation of the maximum lengths.
1539:             * Equivalent to calcNameSetsLengths.
1540:             */
1541:            private boolean initNameSetsLengths() {
1542:                if (m_maxNameLength_ > 0) {
1543:                    return true;
1544:                }
1545:
1546:                String extra = "0123456789ABCDEF<>-";
1547:                // set hex digits, used in various names, and <>-, used in extended
1548:                // names
1549:                for (int i = extra.length() - 1; i >= 0; i--) {
1550:                    add(m_nameSet_, extra.charAt(i));
1551:                }
1552:
1553:                // set sets and lengths from algorithmic names
1554:                m_maxNameLength_ = addAlgorithmName(0);
1555:                // set sets and lengths from extended names
1556:                m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1557:                // set sets and lengths from group names, set global maximum values
1558:                addGroupName(m_maxNameLength_);
1559:                return true;
1560:            }
1561:
1562:            /**
1563:             * Converts the char set cset into a Unicode set uset.
1564:             * Equivalent to charSetToUSet.
1565:             * @param set Set of 256 bit flags corresponding to a set of chars.
1566:             * @param uset USet to receive characters. Existing contents are deleted.
1567:             */
1568:            private void convert(int set[], UnicodeSet uset) {
1569:                uset.clear();
1570:                if (!initNameSetsLengths()) {
1571:                    return;
1572:                }
1573:
1574:                // build a char string with all chars that are used in character names
1575:                for (char c = 255; c > 0; c--) {
1576:                    if (contains(set, c)) {
1577:                        uset.add(c);
1578:                    }
1579:                }
1580:            }
1581:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.