001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.text;
008:
009: import com.ibm.icu.lang.UCharacter;
010: import com.ibm.icu.util.ULocale;
011:
012: import java.text.CharacterIterator;
013:
014: /**
015: * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.
016: * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.
017: */
018: final class BreakTransliterator extends Transliterator {
019: private BreakIterator bi;
020: private String insertion;
021: private int[] boundaries = new int[50];
022: private int boundaryCount = 0;
023:
024: public BreakTransliterator(String ID, UnicodeFilter filter,
025: BreakIterator bi, String insertion) {
026: super (ID, filter);
027: this .bi = bi;
028: this .insertion = insertion;
029: }
030:
031: public BreakTransliterator(String ID, UnicodeFilter filter) {
032: this (ID, filter, null, " ");
033: }
034:
035: public String getInsertion() {
036: return insertion;
037: }
038:
039: public void setInsertion(String insertion) {
040: this .insertion = insertion;
041: }
042:
043: public BreakIterator getBreakIterator() {
044: // Defer initialization of BreakIterator because it is slow,
045: // typically over 2000 ms.
046: if (bi == null)
047: bi = BreakIterator.getWordInstance(new ULocale("th_TH"));
048: return bi;
049: }
050:
051: public void setBreakIterator(BreakIterator bi) {
052: this .bi = bi;
053: }
054:
055: static final int LETTER_OR_MARK_MASK = (1 << Character.UPPERCASE_LETTER)
056: | (1 << Character.LOWERCASE_LETTER)
057: | (1 << Character.TITLECASE_LETTER)
058: | (1 << Character.MODIFIER_LETTER)
059: | (1 << Character.OTHER_LETTER)
060: | (1 << Character.COMBINING_SPACING_MARK)
061: | (1 << Character.NON_SPACING_MARK)
062: | (1 << Character.ENCLOSING_MARK);
063:
064: protected void handleTransliterate(Replaceable text, Position pos,
065: boolean incremental) {
066: boundaryCount = 0;
067: int boundary = 0;
068: getBreakIterator(); // Lazy-create it if necessary
069: bi.setText(new ReplaceableCharacterIterator(text, pos.start,
070: pos.limit, pos.start));
071: // TODO: fix clumsy workaround used below.
072: /*
073: char[] tempBuffer = new char[text.length()];
074: text.getChars(0, text.length(), tempBuffer, 0);
075: bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
076: */
077: // end debugging
078: // To make things much easier, we will stack the boundaries, and then insert at the end.
079: // generally, we won't need too many, since we will be filtered.
080: for (boundary = bi.first(); boundary != BreakIterator.DONE
081: && boundary < pos.limit; boundary = bi.next()) {
082: if (boundary == 0)
083: continue;
084: // HACK: Check to see that preceeding item was a letter
085:
086: int cp = UTF16.charAt(text, boundary - 1);
087: int type = UCharacter.getType(cp);
088: //System.out.println(Integer.toString(cp,16) + " (before): " + type);
089: if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
090: continue;
091:
092: cp = UTF16.charAt(text, boundary);
093: type = UCharacter.getType(cp);
094: //System.out.println(Integer.toString(cp,16) + " (after): " + type);
095: if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
096: continue;
097:
098: if (boundaryCount >= boundaries.length) { // realloc if necessary
099: int[] temp = new int[boundaries.length * 2];
100: System.arraycopy(boundaries, 0, temp, 0,
101: boundaries.length);
102: boundaries = temp;
103: }
104:
105: boundaries[boundaryCount++] = boundary;
106: //System.out.println(boundary);
107: }
108:
109: int delta = 0;
110: int lastBoundary = 0;
111:
112: if (boundaryCount != 0) { // if we found something, adjust
113: delta = boundaryCount * insertion.length();
114: lastBoundary = boundaries[boundaryCount - 1];
115:
116: // we do this from the end backwards, so that we don't have to keep updating.
117:
118: while (boundaryCount > 0) {
119: boundary = boundaries[--boundaryCount];
120: text.replace(boundary, boundary, insertion);
121: }
122: }
123:
124: // Now fix up the return values
125: pos.contextLimit += delta;
126: pos.limit += delta;
127: pos.start = incremental ? lastBoundary + delta : pos.limit;
128: }
129:
130: /**
131: * Registers standard variants with the system. Called by
132: * Transliterator during initialization.
133: */
134: static void register() {
135: // false means that it is invisible
136: Transliterator trans = new BreakTransliterator(
137: "Any-BreakInternal", null);
138: Transliterator.registerInstance(trans, false);
139: /*
140: Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {
141: public Transliterator getInstance(String ID) {
142: return new BreakTransliterator("Any-Break", null);
143: }
144: });
145: */
146: }
147:
148: // Hack, just to get a real character iterator.
149:
150: static final class ReplaceableCharacterIterator implements
151: CharacterIterator {
152: private Replaceable text;
153: private int begin;
154: private int end;
155: // invariant: begin <= pos <= end
156: private int pos;
157:
158: /**
159: * Constructs an iterator with an initial index of 0.
160: */
161: public ReplaceableCharacterIterator(Replaceable text) {
162: this (text, 0);
163: }
164:
165: /**
166: * Constructs an iterator with the specified initial index.
167: *
168: * @param text The String to be iterated over
169: * @param pos Initial iterator position
170: */
171: public ReplaceableCharacterIterator(Replaceable text, int pos) {
172: this (text, 0, text.length(), pos);
173: }
174:
175: /**
176: * Constructs an iterator over the given range of the given string, with the
177: * index set at the specified position.
178: *
179: * @param text The String to be iterated over
180: * @param begin Index of the first character
181: * @param end Index of the character following the last character
182: * @param pos Initial iterator position
183: */
184: public ReplaceableCharacterIterator(Replaceable text,
185: int begin, int end, int pos) {
186: if (text == null) {
187: throw new NullPointerException();
188: }
189: this .text = text;
190:
191: if (begin < 0 || begin > end || end > text.length()) {
192: throw new IllegalArgumentException(
193: "Invalid substring range");
194: }
195:
196: if (pos < begin || pos > end) {
197: throw new IllegalArgumentException("Invalid position");
198: }
199:
200: this .begin = begin;
201: this .end = end;
202: this .pos = pos;
203: }
204:
205: /**
206: * Reset this iterator to point to a new string. This package-visible
207: * method is used by other java.text classes that want to avoid allocating
208: * new ReplaceableCharacterIterator objects every time their setText method
209: * is called.
210: *
211: * @param text The String to be iterated over
212: */
213: public void setText(Replaceable text) {
214: if (text == null) {
215: throw new NullPointerException();
216: }
217: this .text = text;
218: this .begin = 0;
219: this .end = text.length();
220: this .pos = 0;
221: }
222:
223: /**
224: * Implements CharacterIterator.first() for String.
225: * @see CharacterIterator#first
226: */
227: public char first() {
228: pos = begin;
229: return current();
230: }
231:
232: /**
233: * Implements CharacterIterator.last() for String.
234: * @see CharacterIterator#last
235: */
236: public char last() {
237: if (end != begin) {
238: pos = end - 1;
239: } else {
240: pos = end;
241: }
242: return current();
243: }
244:
245: /**
246: * Implements CharacterIterator.setIndex() for String.
247: * @see CharacterIterator#setIndex
248: */
249: public char setIndex(int p) {
250: if (p < begin || p > end) {
251: throw new IllegalArgumentException("Invalid index");
252: }
253: pos = p;
254: return current();
255: }
256:
257: /**
258: * Implements CharacterIterator.current() for String.
259: * @see CharacterIterator#current
260: */
261: public char current() {
262: if (pos >= begin && pos < end) {
263: return text.charAt(pos);
264: } else {
265: return DONE;
266: }
267: }
268:
269: /**
270: * Implements CharacterIterator.next() for String.
271: * @see CharacterIterator#next
272: */
273: public char next() {
274: if (pos < end - 1) {
275: pos++;
276: return text.charAt(pos);
277: } else {
278: pos = end;
279: return DONE;
280: }
281: }
282:
283: /**
284: * Implements CharacterIterator.previous() for String.
285: * @see CharacterIterator#previous
286: */
287: public char previous() {
288: if (pos > begin) {
289: pos--;
290: return text.charAt(pos);
291: } else {
292: return DONE;
293: }
294: }
295:
296: /**
297: * Implements CharacterIterator.getBeginIndex() for String.
298: * @see CharacterIterator#getBeginIndex
299: */
300: public int getBeginIndex() {
301: return begin;
302: }
303:
304: /**
305: * Implements CharacterIterator.getEndIndex() for String.
306: * @see CharacterIterator#getEndIndex
307: */
308: public int getEndIndex() {
309: return end;
310: }
311:
312: /**
313: * Implements CharacterIterator.getIndex() for String.
314: * @see CharacterIterator#getIndex
315: */
316: public int getIndex() {
317: return pos;
318: }
319:
320: /**
321: * Compares the equality of two ReplaceableCharacterIterator objects.
322: * @param obj the ReplaceableCharacterIterator object to be compared with.
323: * @return true if the given obj is the same as this
324: * ReplaceableCharacterIterator object; false otherwise.
325: */
326: public boolean equals(Object obj) {
327: if (this == obj) {
328: return true;
329: }
330: if (!(obj instanceof ReplaceableCharacterIterator)) {
331: return false;
332: }
333:
334: ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;
335:
336: if (hashCode() != that.hashCode()) {
337: return false;
338: }
339: if (!text.equals(that.text)) {
340: return false;
341: }
342: if (pos != that.pos || begin != that.begin
343: || end != that.end) {
344: return false;
345: }
346: return true;
347: }
348:
349: /**
350: * Computes a hashcode for this iterator.
351: * @return A hash code
352: */
353: public int hashCode() {
354: return text.hashCode() ^ pos ^ begin ^ end;
355: }
356:
357: /**
358: * Creates a copy of this iterator.
359: * @return A copy of this
360: */
361: public Object clone() {
362: try {
363: ReplaceableCharacterIterator other = (ReplaceableCharacterIterator) super
364: .clone();
365: return other;
366: } catch (CloneNotSupportedException e) {
367: throw new IllegalStateException();
368: }
369: }
370:
371: }
372:
373: }
|