001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.text;
008:
009: import java.util.*;
010:
011: /**
012: * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
013: * iterates over either code points or code point ranges. After all
014: * code points or ranges have been returned, it returns the
015: * multicharacter strings of the UnicodSet, if any.
016: *
017: * <p>To iterate over code points, use a loop like this:
018: * <pre>
019: * UnicodeSetIterator it = new UnicodeSetIterator(set);
020: * while (set.next()) {
021: * if (set.codepoint != UnicodeSetIterator.IS_STRING) {
022: * processCodepoint(set.codepoint);
023: * } else {
024: * processString(set.string);
025: * }
026: * }
027: * </pre>
028: *
029: * <p>To iterate over code point ranges, use a loop like this:
030: * <pre>
031: * UnicodeSetIterator it = new UnicodeSetIterator(set);
032: * while (set.nextRange()) {
033: * if (set.codepoint != UnicodeSetIterator.IS_STRING) {
034: * processCodepointRange(set.codepoint, set.codepointEnd);
035: * } else {
036: * processString(set.string);
037: * }
038: * }
039: * </pre>
040: * @author M. Davis
041: * @stable ICU 2.0
042: */
043: public class UnicodeSetIterator {
044:
045: /**
046: * Value of <tt>codepoint</tt> if the iterator points to a string.
047: * If <tt>codepoint == IS_STRING</tt>, then examine
048: * <tt>string</tt> for the current iteration result.
049: * @stable ICU 2.0
050: */
051: public static int IS_STRING = -1;
052:
053: /**
054: * Current code point, or the special value <tt>IS_STRING</tt>, if
055: * the iterator points to a string.
056: * @stable ICU 2.0
057: */
058: public int codepoint;
059:
060: /**
061: * When iterating over ranges using <tt>nextRange()</tt>,
062: * <tt>codepointEnd</tt> contains the inclusive end of the
063: * iteration range, if <tt>codepoint != IS_STRING</tt>. If
064: * iterating over code points using <tt>next()</tt>, or if
065: * <tt>codepoint == IS_STRING</tt>, then the value of
066: * <tt>codepointEnd</tt> is undefined.
067: * @stable ICU 2.0
068: */
069: public int codepointEnd;
070:
071: /**
072: * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
073: * to the current string. If <tt>codepoint != IS_STRING</tt>, the
074: * value of <tt>string</tt> is undefined.
075: * @stable ICU 2.0
076: */
077: public String string;
078:
079: /**
080: * Create an iterator over the given set.
081: * @param set set to iterate over
082: * @stable ICU 2.0
083: */
084: public UnicodeSetIterator(UnicodeSet set) {
085: reset(set);
086: }
087:
088: /**
089: * Create an iterator over nothing. <tt>next()</tt> and
090: * <tt>nextRange()</tt> return false. This is a convenience
091: * constructor allowing the target to be set later.
092: * @stable ICU 2.0
093: */
094: public UnicodeSetIterator() {
095: reset(new UnicodeSet());
096: }
097:
098: /**
099: * Returns the next element in the set, either a single code point
100: * or a string. If there are no more elements in the set, return
101: * false. If <tt>codepoint == IS_STRING</tt>, the value is a
102: * string in the <tt>string</tt> field. Otherwise the value is a
103: * single code point in the <tt>codepoint</tt> field.
104: *
105: * <p>The order of iteration is all code points in sorted order,
106: * followed by all strings sorted order. <tt>codepointEnd</tt> is
107: * undefined after calling this method. <tt>string</tt> is
108: * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
109: * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
110: * calling <tt>reset()</tt> between them. The results of doing so
111: * are undefined.
112: *
113: * @return true if there was another element in the set and this
114: * object contains the element.
115: * @stable ICU 2.0
116: */
117: public boolean next() {
118: if (nextElement <= endElement) {
119: codepoint = codepointEnd = nextElement++;
120: return true;
121: }
122: if (range < endRange) {
123: loadRange(++range);
124: codepoint = codepointEnd = nextElement++;
125: return true;
126: }
127:
128: // stringIterator == null iff there are no string elements remaining
129:
130: if (stringIterator == null)
131: return false;
132: codepoint = IS_STRING; // signal that value is actually a string
133: string = (String) stringIterator.next();
134: if (!stringIterator.hasNext())
135: stringIterator = null;
136: return true;
137: }
138:
139: /**
140: * Returns the next element in the set, either a code point range
141: * or a string. If there are no more elements in the set, return
142: * false. If <tt>codepoint == IS_STRING</tt>, the value is a
143: * string in the <tt>string</tt> field. Otherwise the value is a
144: * range of one or more code points from <tt>codepoint</tt> to
145: * <tt>codepointeEnd</tt> inclusive.
146: *
147: * <p>The order of iteration is all code points ranges in sorted
148: * order, followed by all strings sorted order. Ranges are
149: * disjoint and non-contiguous. <tt>string</tt> is undefined
150: * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
151: * <tt>next()</tt> and <tt>nextRange()</tt> without calling
152: * <tt>reset()</tt> between them. The results of doing so are
153: * undefined.
154: *
155: * @return true if there was another element in the set and this
156: * object contains the element.
157: * @stable ICU 2.0
158: */
159: public boolean nextRange() {
160: if (nextElement <= endElement) {
161: codepointEnd = endElement;
162: codepoint = nextElement;
163: nextElement = endElement + 1;
164: return true;
165: }
166: if (range < endRange) {
167: loadRange(++range);
168: codepointEnd = endElement;
169: codepoint = nextElement;
170: nextElement = endElement + 1;
171: return true;
172: }
173:
174: // stringIterator == null iff there are no string elements remaining
175:
176: if (stringIterator == null)
177: return false;
178: codepoint = IS_STRING; // signal that value is actually a string
179: string = (String) stringIterator.next();
180: if (!stringIterator.hasNext())
181: stringIterator = null;
182: return true;
183: }
184:
185: /**
186: * Sets this iterator to visit the elements of the given set and
187: * resets it to the start of that set. The iterator is valid only
188: * so long as <tt>set</tt> is valid.
189: * @param set the set to iterate over.
190: * @stable ICU 2.0
191: */
192: public void reset(UnicodeSet set) {
193: this .set = set;
194: reset();
195: }
196:
197: /**
198: * Resets this iterator to the start of the set.
199: * @stable ICU 2.0
200: */
201: public void reset() {
202: endRange = set.getRangeCount() - 1;
203: range = 0;
204: endElement = -1;
205: nextElement = 0;
206: if (endRange >= 0) {
207: loadRange(range);
208: }
209: stringIterator = null;
210: if (set.strings != null) {
211: stringIterator = set.strings.iterator();
212: if (!stringIterator.hasNext())
213: stringIterator = null;
214: }
215: }
216:
217: /**
218: * Gets the current string from the iterator. Only use after calling next(), not nextRange().
219: * @internal
220: * @deprecated This API is ICU internal only.
221: */
222: public String getString() {
223: if (codepoint != IS_STRING) {
224: return UTF16.valueOf(codepoint);
225: }
226: return string;
227: }
228:
229: // ======================= PRIVATES ===========================
230:
231: private UnicodeSet set;
232: private int endRange = 0;
233: private int range = 0;
234: /**
235: * @internal
236: * @deprecated This API is ICU internal only.
237: */
238: protected int endElement;
239: /**
240: * @internal
241: * @deprecated This API is ICU internal only.
242: */
243: protected int nextElement;
244: private Iterator stringIterator = null;
245:
246: /**
247: * Invariant: stringIterator is null when there are no (more) strings remaining
248: */
249:
250: /**
251: * @internal
252: * @deprecated This API is ICU internal only.
253: */
254: protected void loadRange(int range) {
255: nextElement = set.getRangeStart(range);
256: endElement = set.getRangeEnd(range);
257: }
258: }
|