001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.text;
008:
009: import java.text.CharacterIterator;
010:
011: import com.ibm.icu.impl.CharacterIteratorWrapper;
012: import com.ibm.icu.impl.ReplaceableUCharacterIterator;
013: import com.ibm.icu.impl.UCharArrayIterator;
014: import com.ibm.icu.impl.UCharacterIteratorWrapper;
015: import com.ibm.icu.impl.UCharacterProperty;
016:
017: /**
018: * Abstract class that defines an API for iteration on text objects.This is an
019: * interface for forward and backward iteration and random access into a text
020: * object. Forward iteration is done with post-increment and backward iteration
021: * is done with pre-decrement semantics, while the
022: * <code>java.text.CharacterIterator</code> interface methods provided forward
023: * iteration with "pre-increment" and backward iteration with pre-decrement
024: * semantics. This API is more efficient for forward iteration over code points.
025: * The other major difference is that this API can do both code unit and code point
026: * iteration, <code>java.text.CharacterIterator</code> can only iterate over
027: * code units and is limited to BMP (0 - 0xFFFF)
028: * @author Ram
029: * @stable ICU 2.4
030: */
031: public abstract class UCharacterIterator implements Cloneable,
032: UForwardCharacterIterator {
033:
034: /**
035: * Protected default constructor for the subclasses
036: * @stable ICU 2.4
037: */
038: protected UCharacterIterator() {
039: }
040:
041: // static final methods ----------------------------------------------------
042:
043: /**
044: * Returns a <code>UCharacterIterator</code> object given a
045: * <code>Replaceable</code> object.
046: * @param source a valid source as a <code>Replaceable</code> object
047: * @return UCharacterIterator object
048: * @exception IllegalArgumentException if the argument is null
049: * @stable ICU 2.4
050: */
051: public static final UCharacterIterator getInstance(
052: Replaceable source) {
053: return new ReplaceableUCharacterIterator(source);
054: }
055:
056: /**
057: * Returns a <code>UCharacterIterator</code> object given a
058: * source string.
059: * @param source a string
060: * @return UCharacterIterator object
061: * @exception IllegalArgumentException if the argument is null
062: * @stable ICU 2.4
063: */
064: public static final UCharacterIterator getInstance(String source) {
065: return new ReplaceableUCharacterIterator(source);
066: }
067:
068: /**
069: * Returns a <code>UCharacterIterator</code> object given a
070: * source character array.
071: * @param source an array of UTF-16 code units
072: * @return UCharacterIterator object
073: * @exception IllegalArgumentException if the argument is null
074: * @stable ICU 2.4
075: */
076: public static final UCharacterIterator getInstance(char[] source) {
077: return getInstance(source, 0, source.length);
078: }
079:
080: /**
081: * Returns a <code>UCharacterIterator</code> object given a
082: * source character array.
083: * @param source an array of UTF-16 code units
084: * @return UCharacterIterator object
085: * @exception IllegalArgumentException if the argument is null
086: * @stable ICU 2.4
087: */
088: public static final UCharacterIterator getInstance(char[] source,
089: int start, int limit) {
090: return new UCharArrayIterator(source, start, limit);
091: }
092:
093: /**
094: * Returns a <code>UCharacterIterator</code> object given a
095: * source StringBuffer.
096: * @param source an string buffer of UTF-16 code units
097: * @return UCharacterIterator object
098: * @exception IllegalArgumentException if the argument is null
099: * @stable ICU 2.4
100: */
101: public static final UCharacterIterator getInstance(
102: StringBuffer source) {
103: return new ReplaceableUCharacterIterator(source);
104: }
105:
106: /**
107: * Returns a <code>UCharacterIterator</code> object given a
108: * CharacterIterator.
109: * @param source a valid CharacterIterator object.
110: * @return UCharacterIterator object
111: * @exception IllegalArgumentException if the argument is null
112: * @stable ICU 2.4
113: */
114: public static final UCharacterIterator getInstance(
115: CharacterIterator source) {
116: return new CharacterIteratorWrapper(source);
117: }
118:
119: // public methods ----------------------------------------------------------
120: /**
121: * Returns a <code>java.text.CharacterIterator</code> object for
122: * the underlying text of this iterator. The returned iterator is
123: * independent of this iterator.
124: * @return java.text.CharacterIterator object
125: * @stable ICU 2.4
126: */
127: public CharacterIterator getCharacterIterator() {
128: return new UCharacterIteratorWrapper(this );
129: }
130:
131: /**
132: * Returns the code unit at the current index. If index is out
133: * of range, returns DONE. Index is not changed.
134: * @return current code unit
135: * @stable ICU 2.4
136: */
137: public abstract int current();
138:
139: /**
140: * Returns the codepoint at the current index.
141: * If the current index is invalid, DONE is returned.
142: * If the current index points to a lead surrogate, and there is a following
143: * trail surrogate, then the code point is returned. Otherwise, the code
144: * unit at index is returned. Index is not changed.
145: * @return current codepoint
146: * @stable ICU 2.4
147: */
148: public int currentCodePoint() {
149: int ch = current();
150: if (UTF16.isLeadSurrogate((char) ch)) {
151: // advance the index to get the
152: // next code point
153: next();
154: // due to post increment semantics
155: // current() after next() actually
156: // returns the char we want
157: int ch2 = current();
158: // current should never change
159: // the current index so back off
160: previous();
161:
162: if (UTF16.isTrailSurrogate((char) ch2)) {
163: // we found a surrogate pair
164: // return the codepoint
165: return UCharacterProperty.getRawSupplementary(
166: (char) ch, (char) ch2);
167: }
168: }
169: return ch;
170: }
171:
172: /**
173: * Returns the length of the text
174: * @return length of the text
175: * @stable ICU 2.4
176: */
177: public abstract int getLength();
178:
179: /**
180: * Gets the current index in text.
181: * @return current index in text.
182: * @stable ICU 2.4
183: */
184: public abstract int getIndex();
185:
186: /**
187: * Returns the UTF16 code unit at index, and increments to the next
188: * code unit (post-increment semantics). If index is out of
189: * range, DONE is returned, and the iterator is reset to the limit
190: * of the text.
191: * @return the next UTF16 code unit, or DONE if the index is at the limit
192: * of the text.
193: * @stable ICU 2.4
194: */
195: public abstract int next();
196:
197: /**
198: * Returns the code point at index, and increments to the next code
199: * point (post-increment semantics). If index does not point to a
200: * valid surrogate pair, the behavior is the same as
201: * <code>next()<code>. Otherwise the iterator is incremented past
202: * the surrogate pair, and the code point represented by the pair
203: * is returned.
204: * @return the next codepoint in text, or DONE if the index is at
205: * the limit of the text.
206: * @stable ICU 2.4
207: */
208: public int nextCodePoint() {
209: int ch1 = next();
210: if (UTF16.isLeadSurrogate((char) ch1)) {
211: int ch2 = next();
212: if (UTF16.isTrailSurrogate((char) ch2)) {
213: return UCharacterProperty.getRawSupplementary(
214: (char) ch1, (char) ch2);
215: } else if (ch2 != DONE) {
216: // unmatched surrogate so back out
217: previous();
218: }
219: }
220: return ch1;
221: }
222:
223: /**
224: * Decrement to the position of the previous code unit in the
225: * text, and return it (pre-decrement semantics). If the
226: * resulting index is less than 0, the index is reset to 0 and
227: * DONE is returned.
228: * @return the previous code unit in the text, or DONE if the new
229: * index is before the start of the text.
230: * @stable ICU 2.4
231: */
232: public abstract int previous();
233:
234: /**
235: * Retreat to the start of the previous code point in the text,
236: * and return it (pre-decrement semantics). If the index is not
237: * preceeded by a valid surrogate pair, the behavior is the same
238: * as <code>previous()</code>. Otherwise the iterator is
239: * decremented to the start of the surrogate pair, and the code
240: * point represented by the pair is returned.
241: * @return the previous code point in the text, or DONE if the new
242: * index is before the start of the text.
243: * @stable ICU 2.4
244: */
245: public int previousCodePoint() {
246: int ch1 = previous();
247: if (UTF16.isTrailSurrogate((char) ch1)) {
248: int ch2 = previous();
249: if (UTF16.isLeadSurrogate((char) ch2)) {
250: return UCharacterProperty.getRawSupplementary(
251: (char) ch2, (char) ch1);
252: } else if (ch2 != DONE) {
253: //unmatched trail surrogate so back out
254: next();
255: }
256: }
257: return ch1;
258: }
259:
260: /**
261: * Sets the index to the specified index in the text.
262: * @param index the index within the text.
263: * @exception IndexOutOfBoundsException is thrown if an invalid index is
264: * supplied
265: * @stable ICU 2.4
266: */
267: public abstract void setIndex(int index);
268:
269: /**
270: * Sets the current index to the limit.
271: * @stable ICU 2.4
272: */
273: public void setToLimit() {
274: setIndex(getLength());
275: }
276:
277: /**
278: * Sets the current index to the start.
279: * @stable ICU 2.4
280: */
281: public void setToStart() {
282: setIndex(0);
283: }
284:
285: /**
286: * Fills the buffer with the underlying text storage of the iterator
287: * If the buffer capacity is not enough a exception is thrown. The capacity
288: * of the fill in buffer should at least be equal to length of text in the
289: * iterator obtained by calling <code>getLength()</code).
290: * <b>Usage:</b>
291: *
292: * <code>
293: * <pre>
294: * UChacterIterator iter = new UCharacterIterator.getInstance(text);
295: * char[] buf = new char[iter.getLength()];
296: * iter.getText(buf);
297: *
298: * OR
299: * char[] buf= new char[1];
300: * int len = 0;
301: * for(;;){
302: * try{
303: * len = iter.getText(buf);
304: * break;
305: * }catch(IndexOutOfBoundsException e){
306: * buf = new char[iter.getLength()];
307: * }
308: * }
309: * </pre>
310: * </code>
311: *
312: * @param fillIn an array of chars to fill with the underlying UTF-16 code
313: * units.
314: * @param offset the position within the array to start putting the data.
315: * @return the number of code units added to fillIn, as a convenience
316: * @exception IndexOutOfBounds exception if there is not enough
317: * room after offset in the array, or if offset < 0.
318: * @stable ICU 2.4
319: */
320: public abstract int getText(char[] fillIn, int offset);
321:
322: /**
323: * Convenience override for <code>getText(char[], int)>/code> that provides
324: * an offset of 0.
325: * @param fillIn an array of chars to fill with the underlying UTF-16 code
326: * units.
327: * @return the number of code units added to fillIn, as a convenience
328: * @exception IndexOutOfBounds exception if there is not enough
329: * room in the array.
330: * @stable ICU 2.4
331: */
332: public final int getText(char[] fillIn) {
333: return getText(fillIn, 0);
334: }
335:
336: /**
337: * Convenience method for returning the underlying text storage as as string
338: * @return the underlying text storage in the iterator as a string
339: * @stable ICU 2.4
340: */
341: public String getText() {
342: char[] text = new char[getLength()];
343: getText(text);
344: return new String(text);
345: }
346:
347: /**
348: * Moves the current position by the number of code units
349: * specified, either forward or backward depending on the sign
350: * of delta (positive or negative respectively). If the resulting
351: * index would be less than zero, the index is set to zero, and if
352: * the resulting index would be greater than limit, the index is
353: * set to limit.
354: *
355: * @param delta the number of code units to move the current
356: * index.
357: * @return the new index.
358: * @exception IndexOutOfBoundsException is thrown if an invalid index is
359: * supplied
360: * @stable ICU 2.4
361: *
362: */
363: public int moveIndex(int delta) {
364: int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
365: setIndex(x);
366: return x;
367: }
368:
369: /**
370: * Moves the current position by the number of code points
371: * specified, either forward or backward depending on the sign of
372: * delta (positive or negative respectively). If the current index
373: * is at a trail surrogate then the first adjustment is by code
374: * unit, and the remaining adjustments are by code points. If the
375: * resulting index would be less than zero, the index is set to
376: * zero, and if the resulting index would be greater than limit,
377: * the index is set to limit.
378: * @param delta the number of code units to move the current index.
379: * @return the new index
380: * @exception IndexOutOfBoundsException is thrown if an invalid delta is
381: * supplied
382: * @stable ICU 2.4
383: */
384: public int moveCodePointIndex(int delta) {
385: if (delta > 0) {
386: while (delta > 0 && nextCodePoint() != DONE) {
387: delta--;
388: }
389: } else {
390: while (delta < 0 && previousCodePoint() != DONE) {
391: delta++;
392: }
393: }
394: if (delta != 0) {
395: throw new IndexOutOfBoundsException();
396: }
397:
398: return getIndex();
399: }
400:
401: /**
402: * Creates a copy of this iterator, independent from other iterators.
403: * If it is not possible to clone the iterator, returns null.
404: * @return copy of this iterator
405: * @stable ICU 2.4
406: */
407: public Object clone() throws CloneNotSupportedException {
408: return super.clone();
409: }
410:
411: }
|