001: /*
002: * Portions Copyright 2005 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: /*
027: *******************************************************************************
028: * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved *
029: * *
030: * The original version of this source code and documentation is copyrighted *
031: * and owned by IBM, These materials are provided under terms of a License *
032: * Agreement between IBM and Sun. This technology is protected by multiple *
033: * US and International patents. This notice and attribution to IBM may not *
034: * to removed. *
035: *******************************************************************************
036: */
037:
038: package sun.text.normalizer;
039:
040: import java.util.Iterator;
041:
042: /**
043: * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
044: * iterates over either code points or code point ranges. After all
045: * code points or ranges have been returned, it returns the
046: * multicharacter strings of the UnicodSet, if any.
047: *
048: * <p>To iterate over code points, use a loop like this:
049: * <pre>
050: * UnicodeSetIterator it(set);
051: * while (set.next()) {
052: * if (set.codepoint != UnicodeSetIterator::IS_STRING) {
053: * processCodepoint(set.codepoint);
054: * } else {
055: * processString(set.string);
056: * }
057: * }
058: * </pre>
059: *
060: * <p>To iterate over code point ranges, use a loop like this:
061: * <pre>
062: * UnicodeSetIterator it(set);
063: * while (set.nextRange()) {
064: * if (set.codepoint != UnicodeSetIterator::IS_STRING) {
065: * processCodepointRange(set.codepoint, set.codepointEnd);
066: * } else {
067: * processString(set.string);
068: * }
069: * }
070: * </pre>
071: * @author M. Davis
072: * @stable ICU 2.0
073: */
074: public class UnicodeSetIterator {
075:
076: /**
077: * Value of <tt>codepoint</tt> if the iterator points to a string.
078: * If <tt>codepoint == IS_STRING</tt>, then examine
079: * <tt>string</tt> for the current iteration result.
080: * @stable ICU 2.0
081: */
082: public static int IS_STRING = -1;
083:
084: /**
085: * Current code point, or the special value <tt>IS_STRING</tt>, if
086: * the iterator points to a string.
087: * @stable ICU 2.0
088: */
089: public int codepoint;
090:
091: /**
092: * When iterating over ranges using <tt>nextRange()</tt>,
093: * <tt>codepointEnd</tt> contains the inclusive end of the
094: * iteration range, if <tt>codepoint != IS_STRING</tt>. If
095: * iterating over code points using <tt>next()</tt>, or if
096: * <tt>codepoint == IS_STRING</tt>, then the value of
097: * <tt>codepointEnd</tt> is undefined.
098: * @stable ICU 2.0
099: */
100: public int codepointEnd;
101:
102: /**
103: * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
104: * to the current string. If <tt>codepoint != IS_STRING</tt>, the
105: * value of <tt>string</tt> is undefined.
106: * @stable ICU 2.0
107: */
108: public String string;
109:
110: /**
111: * Create an iterator over the given set.
112: * @param set set to iterate over
113: * @stable ICU 2.0
114: */
115: public UnicodeSetIterator(UnicodeSet set) {
116: reset(set);
117: }
118:
119: /**
120: * Returns the next element in the set, either a code point range
121: * or a string. If there are no more elements in the set, return
122: * false. If <tt>codepoint == IS_STRING</tt>, the value is a
123: * string in the <tt>string</tt> field. Otherwise the value is a
124: * range of one or more code points from <tt>codepoint</tt> to
125: * <tt>codepointeEnd</tt> inclusive.
126: *
127: * <p>The order of iteration is all code points ranges in sorted
128: * order, followed by all strings sorted order. Ranges are
129: * disjoint and non-contiguous. <tt>string</tt> is undefined
130: * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
131: * <tt>next()</tt> and <tt>nextRange()</tt> without calling
132: * <tt>reset()</tt> between them. The results of doing so are
133: * undefined.
134: *
135: * @return true if there was another element in the set and this
136: * object contains the element.
137: * @stable ICU 2.0
138: */
139: public boolean nextRange() {
140: if (nextElement <= endElement) {
141: codepointEnd = endElement;
142: codepoint = nextElement;
143: nextElement = endElement + 1;
144: return true;
145: }
146: if (range < endRange) {
147: loadRange(++range);
148: codepointEnd = endElement;
149: codepoint = nextElement;
150: nextElement = endElement + 1;
151: return true;
152: }
153:
154: // stringIterator == null iff there are no string elements remaining
155:
156: if (stringIterator == null)
157: return false;
158: codepoint = IS_STRING; // signal that value is actually a string
159: string = (String) stringIterator.next();
160: if (!stringIterator.hasNext())
161: stringIterator = null;
162: return true;
163: }
164:
165: /**
166: * Sets this iterator to visit the elements of the given set and
167: * resets it to the start of that set. The iterator is valid only
168: * so long as <tt>set</tt> is valid.
169: * @param set the set to iterate over.
170: * @stable ICU 2.0
171: */
172: public void reset(UnicodeSet set) {
173: this .set = set;
174: reset();
175: }
176:
177: /**
178: * Resets this iterator to the start of the set.
179: * @stable ICU 2.0
180: */
181: public void reset() {
182: endRange = set.getRangeCount() - 1;
183: range = 0;
184: endElement = -1;
185: nextElement = 0;
186: if (endRange >= 0) {
187: loadRange(range);
188: }
189: stringIterator = null;
190: if (set.strings != null) {
191: stringIterator = set.strings.iterator();
192: if (!stringIterator.hasNext())
193: stringIterator = null;
194: }
195: }
196:
197: // ======================= PRIVATES ===========================
198:
199: private UnicodeSet set;
200: private int endRange = 0;
201: private int range = 0;
202: /**
203: * @internal
204: */
205: protected int endElement;
206: /**
207: * @internal
208: */
209: protected int nextElement;
210: private Iterator stringIterator = null;
211:
212: /**
213: * Invariant: stringIterator is null when there are no (more) strings remaining
214: */
215:
216: /**
217: * @internal
218: */
219: protected void loadRange(int range) {
220: nextElement = set.getRangeStart(range);
221: endElement = set.getRangeEnd(range);
222: }
223: }
|