001: /*
002: * Copyright (C) 1996-2004, International Business Machines Corporation and
003: * others. All Rights Reserved.
004: */
005: package com.ibm.icu.text;
006:
007: import com.ibm.icu.lang.*;
008: import com.ibm.icu.impl.Utility;
009: import com.ibm.icu.impl.UCharacterProperty;
010: import com.ibm.icu.impl.UCharacterName;
011:
012: /**
013: * A transliterator that performs name to character mapping.
014: * @author Alan Liu
015: */
016: class NameUnicodeTransliterator extends Transliterator {
017:
018: char openDelimiter;
019: char closeDelimiter;
020:
021: static final String _ID = "Name-Any";
022:
023: static final String OPEN_PAT = "\\N~{~";
024: static final char OPEN_DELIM = '\\'; // first char of OPEN_PAT
025: static final char CLOSE_DELIM = '}';
026: static final char SPACE = ' ';
027:
028: /**
029: * System registration hook.
030: */
031: static void register() {
032: Transliterator.registerFactory(_ID,
033: new Transliterator.Factory() {
034: public Transliterator getInstance(String ID) {
035: return new NameUnicodeTransliterator(null);
036: }
037: });
038: }
039:
040: /**
041: * Constructs a transliterator.
042: */
043: public NameUnicodeTransliterator(UnicodeFilter filter) {
044: super (_ID, filter);
045: }
046:
047: /**
048: * Implements {@link Transliterator#handleTransliterate}.
049: */
050: protected void handleTransliterate(Replaceable text,
051: Position offsets, boolean isIncremental) {
052:
053: int maxLen = UCharacterName.getInstance()
054: .getMaxCharNameLength() + 1; // allow for temporary trailing space
055:
056: StringBuffer name = new StringBuffer(maxLen);
057:
058: // Get the legal character set
059: UnicodeSet legal = new UnicodeSet();
060: UCharacterName.getInstance().getCharNameCharacters(legal);
061:
062: int cursor = offsets.start;
063: int limit = offsets.limit;
064:
065: // Modes:
066: // 0 - looking for open delimiter
067: // 1 - after open delimiter
068: int mode = 0;
069: int openPos = -1; // open delim candidate pos
070:
071: int c;
072: while (cursor < limit) {
073: c = text.char32At(cursor);
074:
075: switch (mode) {
076: case 0: // looking for open delimiter
077: if (c == OPEN_DELIM) { // quick check first
078: openPos = cursor;
079: int i = Utility.parsePattern(OPEN_PAT, text,
080: cursor, limit);
081: if (i >= 0 && i < limit) {
082: mode = 1;
083: name.setLength(0);
084: cursor = i;
085: continue; // *** reprocess char32At(cursor)
086: }
087: }
088: break;
089:
090: case 1: // after open delimiter
091: // Look for legal chars. If \s+ is found, convert it
092: // to a single space. If closeDelimiter is found, exit
093: // the loop. If any other character is found, exit the
094: // loop. If the limit is reached, exit the loop.
095:
096: // Convert \s+ => SPACE. This assumes there are no
097: // runs of >1 space characters in names.
098: if (UCharacterProperty.isRuleWhiteSpace(c)) {
099: // Ignore leading whitespace
100: if (name.length() > 0
101: && name.charAt(name.length() - 1) != SPACE) {
102: name.append(SPACE);
103: // If we are too long then abort. maxLen includes
104: // temporary trailing space, so use '>'.
105: if (name.length() > maxLen) {
106: mode = 0;
107: }
108: }
109: break;
110: }
111:
112: if (c == CLOSE_DELIM) {
113:
114: int len = name.length();
115:
116: // Delete trailing space, if any
117: if (len > 0 && name.charAt(len - 1) == SPACE) {
118: name.setLength(--len);
119: }
120:
121: c = UCharacter.getCharFromExtendedName(name
122: .toString());
123: if (c != -1) {
124: // Lookup succeeded
125:
126: // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
127: cursor++; // advance over CLOSE_DELIM
128:
129: String str = UTF16.valueOf(c);
130: text.replace(openPos, cursor, str);
131:
132: // Adjust indices for the change in the length of
133: // the string. Do not assume that str.length() ==
134: // 1, in case of surrogates.
135: int delta = cursor - openPos - str.length();
136: cursor -= delta;
137: limit -= delta;
138: // assert(cursor == openPos + str.length());
139: }
140: // If the lookup failed, we leave things as-is and
141: // still switch to mode 0 and continue.
142: mode = 0;
143: openPos = -1; // close off candidate
144: continue; // *** reprocess char32At(cursor)
145: }
146:
147: if (legal.contains(c)) {
148: UTF16.append(name, c);
149: // If we go past the longest possible name then abort.
150: // maxLen includes temporary trailing space, so use '>='.
151: if (name.length() >= maxLen) {
152: mode = 0;
153: }
154: }
155:
156: // Invalid character
157: else {
158: --cursor; // Backup and reprocess this character
159: mode = 0;
160: }
161:
162: break;
163: }
164:
165: cursor += UTF16.getCharCount(c);
166: }
167:
168: offsets.contextLimit += limit - offsets.limit;
169: offsets.limit = limit;
170: // In incremental mode, only advance the cursor up to the last
171: // open delimiter candidate.
172: offsets.start = (isIncremental && openPos >= 0) ? openPos
173: : cursor;
174: }
175: }
|