001: /*
002: * Copyright (C) 1996-2005, International Business Machines Corporation and
003: * others. All Rights Reserved.
004: *
005: */
006: package com.ibm.icu.text;
007:
008: import java.io.IOException;
009:
010: import com.ibm.icu.impl.UCaseProps;
011:
012: import com.ibm.icu.util.ULocale;
013:
014: import com.ibm.icu.text.ReplaceableContextIterator;
015:
016: /**
017: * A transliterator that converts all letters (as defined by
018: * <code>UCharacter.isLetter()</code>) to lower case, except for those
019: * letters preceded by non-letters. The latter are converted to title
020: * case using <code>UCharacter.toTitleCase()</code>.
021: * @author Alan Liu
022: */
023: class TitlecaseTransliterator extends Transliterator {
024:
025: static final String _ID = "Any-Title";
026:
027: /**
028: * System registration hook.
029: */
030: static void register() {
031: Transliterator.registerFactory(_ID,
032: new Transliterator.Factory() {
033: public Transliterator getInstance(String ID) {
034: return new TitlecaseTransliterator(ULocale.US);
035: }
036: });
037:
038: registerSpecialInverse("Title", "Lower", false);
039: }
040:
041: private ULocale locale;
042:
043: private UCaseProps csp;
044: private ReplaceableContextIterator iter;
045: private StringBuffer result;
046: private int[] locCache;
047:
048: /**
049: * Constructs a transliterator.
050: */
051: public TitlecaseTransliterator(ULocale loc) {
052: super (_ID, null);
053: locale = loc;
054: // Need to look back 2 characters in the case of "can't"
055: setMaximumContextLength(2);
056: try {
057: csp = UCaseProps.getSingleton();
058: } catch (IOException e) {
059: csp = null;
060: }
061: iter = new ReplaceableContextIterator();
062: result = new StringBuffer();
063: int[] locCache = new int[1];
064: locCache[0] = 0;
065: }
066:
067: /**
068: * Implements {@link Transliterator#handleTransliterate}.
069: */
070: protected void handleTransliterate(Replaceable text,
071: Position offsets, boolean isIncremental) {
072: // TODO reimplement, see ustrcase.c
073: // using a real word break iterator
074: // instead of just looking for a transition between cased and uncased characters
075: // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
076: // needs to take isIncremental into account because case mappings are context-sensitive
077: // also detect when lowercasing function did not finish because of context
078:
079: if (offsets.start >= offsets.limit) {
080: return;
081: }
082:
083: // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable
084: int type;
085:
086: // Our mode; we are either converting letter toTitle or
087: // toLower.
088: boolean doTitle = true;
089:
090: // Determine if there is a preceding context of cased case-ignorable*,
091: // in which case we want to start in toLower mode. If the
092: // prior context is anything else (including empty) then start
093: // in toTitle mode.
094: int c, start;
095: for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16
096: .getCharCount(c)) {
097: c = text.char32At(start);
098: type = csp.getTypeOrIgnorable(c);
099: if (type > 0) { // cased
100: doTitle = false;
101: break;
102: } else if (type == 0) { // uncased but not ignorable
103: break;
104: }
105: // else (type<0) case-ignorable: continue
106: }
107:
108: // Convert things after a cased character toLower; things
109: // after a uncased, non-case-ignorable character toTitle. Case-ignorable
110: // characters are copied directly and do not change the mode.
111:
112: iter.setText(text);
113: iter.setIndex(offsets.start);
114: iter.setLimit(offsets.limit);
115: iter.setContextLimits(offsets.contextStart,
116: offsets.contextLimit);
117:
118: result.setLength(0);
119:
120: // Walk through original string
121: // If there is a case change, modify corresponding position in replaceable
122: int delta;
123:
124: while ((c = iter.nextCaseMapCP()) >= 0) {
125: type = csp.getTypeOrIgnorable(c);
126: if (type >= 0) { // not case-ignorable
127: if (doTitle) {
128: c = csp.toFullTitle(c, iter, result, locale,
129: locCache);
130: } else {
131: c = csp.toFullLower(c, iter, result, locale,
132: locCache);
133: }
134: doTitle = type == 0; // doTitle=isUncased
135:
136: if (iter.didReachLimit() && isIncremental) {
137: // the case mapping function tried to look beyond the context limit
138: // wait for more input
139: offsets.start = iter.getCaseMapCPStart();
140: return;
141: }
142:
143: /* decode the result */
144: if (c < 0) {
145: /* c mapped to itself, no change */
146: continue;
147: } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
148: /* replace by the mapping string */
149: delta = iter.replace(result.toString());
150: result.setLength(0);
151: } else {
152: /* replace by single-code point mapping */
153: delta = iter.replace(UTF16.valueOf(c));
154: }
155:
156: if (delta != 0) {
157: offsets.limit += delta;
158: offsets.contextLimit += delta;
159: }
160: }
161: }
162: offsets.start = offsets.limit;
163: }
164: }
|