001: /*
002: ******************************************************************************
003: * Copyright (C) 1996-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: ******************************************************************************
006: */
007:
008: package com.ibm.icu.lang;
009:
010: import com.ibm.icu.util.ValueIterator;
011: import com.ibm.icu.impl.UCharacterName;
012: import com.ibm.icu.impl.UCharacterNameChoice;
013:
014: /**
015: * <p>Class enabling iteration of the codepoints and their names.</p>
016: * <p>Result of each iteration contains a valid codepoint that has valid
017: * name.</p>
018: * <p>See UCharacter.getNameIterator() for an example of use.</p>
019: * @author synwee
020: * @since release 2.1, March 5 2002
021: */
022: class UCharacterNameIterator implements ValueIterator {
023: // public methods ----------------------------------------------------
024:
025: /**
026: * <p>Gets the next result for this iteration and returns
027: * true if we are not at the end of the iteration, false otherwise.</p>
028: * <p>If the return boolean is a false, the contents of elements will not
029: * be updated.</p>
030: * @param element for storing the result codepoint and name
031: * @return true if we are not at the end of the iteration, false otherwise.
032: * @see Element
033: * @draft 2.1
034: */
035: public boolean next(ValueIterator.Element element) {
036: if (m_current_ >= m_limit_) {
037: return false;
038: }
039:
040: if (m_choice_ != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
041: int length = m_name_.getAlgorithmLength();
042: if (m_algorithmIndex_ < length) {
043: while (m_algorithmIndex_ < length) {
044: // find the algorithm range that could contain m_current_
045: if (m_algorithmIndex_ < 0
046: || m_name_
047: .getAlgorithmEnd(m_algorithmIndex_) < m_current_) {
048: m_algorithmIndex_++;
049: } else {
050: break;
051: }
052: }
053:
054: if (m_algorithmIndex_ < length) {
055: // interleave the data-driven ones with the algorithmic ones
056: // iterate over all algorithmic ranges; assume that they are
057: // in ascending order
058: int start = m_name_
059: .getAlgorithmStart(m_algorithmIndex_);
060: if (m_current_ < start) {
061: // this should get rid of those codepoints that are not
062: // in the algorithmic range
063: int end = start;
064: if (m_limit_ <= start) {
065: end = m_limit_;
066: }
067: if (!iterateGroup(element, end)) {
068: m_current_++;
069: return true;
070: }
071: }
072:
073: if (m_current_ >= m_limit_) {
074: // after iterateGroup fails, current codepoint may be
075: // greater than limit
076: return false;
077: }
078:
079: element.integer = m_current_;
080: element.value = m_name_.getAlgorithmName(
081: m_algorithmIndex_, m_current_);
082: // reset the group index if we are in the algorithmic names
083: m_groupIndex_ = -1;
084: m_current_++;
085: return true;
086: }
087: }
088: }
089: // enumerate the character names after the last algorithmic range
090: if (!iterateGroup(element, m_limit_)) {
091: m_current_++;
092: return true;
093: } else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
094: if (!iterateExtended(element, m_limit_)) {
095: m_current_++;
096: return true;
097: }
098: }
099:
100: return false;
101: }
102:
103: /**
104: * <p>Resets the iterator to start iterating from the integer index
105: * UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously.
106: * </p>
107: * @draft 2.1
108: */
109: public void reset() {
110: m_current_ = m_start_;
111: m_groupIndex_ = -1;
112: m_algorithmIndex_ = -1;
113: }
114:
115: /**
116: * <p>Restricts the range of integers to iterate and resets the iteration
117: * to begin at the index argument start.</p>
118: * <p>If setRange(start, end) is not performed before next(element) is
119: * called, the iteration will start from the integer index
120: * UCharacter.MIN_VALUE and end at UCharacter.MAX_VALUE.</p>
121: * <p>
122: * If this range is set outside the range of UCharacter.MIN_VALUE and
123: * UCharacter.MAX_VALUE, next(element) will always return false.
124: * </p>
125: * @param start first integer in range to iterate
126: * @param limit 1 integer after the last integer in range
127: * @exception IllegalArgumentException thrown when attempting to set an
128: * illegal range. E.g limit <= start
129: * @draft 2.1
130: */
131: public void setRange(int start, int limit) {
132: if (start >= limit) {
133: throw new IllegalArgumentException(
134: "start or limit has to be valid Unicode codepoints and start < limit");
135: }
136: if (start < UCharacter.MIN_VALUE) {
137: m_start_ = UCharacter.MIN_VALUE;
138: } else {
139: m_start_ = start;
140: }
141:
142: if (limit > UCharacter.MAX_VALUE + 1) {
143: m_limit_ = UCharacter.MAX_VALUE + 1;
144: } else {
145: m_limit_ = limit;
146: }
147: m_current_ = m_start_;
148: }
149:
150: // protected constructor ---------------------------------------------
151:
152: /**
153: * Constructor
154: * @param name name data
155: * @param choice name choice from the class
156: * com.ibm.icu.lang.UCharacterNameChoice
157: * @draft 2.1
158: */
159: protected UCharacterNameIterator(UCharacterName name, int choice) {
160: if (name == null) {
161: throw new IllegalArgumentException(
162: "UCharacterName name argument cannot be null. Missing unames.icu?");
163: }
164: m_name_ = name;
165: // no explicit choice in UCharacter so no checks on choice
166: m_choice_ = choice;
167: m_start_ = UCharacter.MIN_VALUE;
168: m_limit_ = UCharacter.MAX_VALUE + 1;
169: m_current_ = m_start_;
170: }
171:
172: // private data members ---------------------------------------------
173:
174: /**
175: * Name data
176: */
177: private UCharacterName m_name_;
178: /**
179: * Name choice
180: */
181: private int m_choice_;
182: /**
183: * Start iteration range
184: */
185: private int m_start_;
186: /**
187: * End + 1 iteration range
188: */
189: private int m_limit_;
190: /**
191: * Current codepoint
192: */
193: private int m_current_;
194: /**
195: * Group index
196: */
197: private int m_groupIndex_ = -1;
198: /**
199: * Algorithm index
200: */
201: private int m_algorithmIndex_ = -1;
202: /**
203: * Group use
204: */
205: private static char GROUP_OFFSETS_[] = new char[UCharacterName.LINES_PER_GROUP_ + 1];
206: private static char GROUP_LENGTHS_[] = new char[UCharacterName.LINES_PER_GROUP_ + 1];
207:
208: // private methods --------------------------------------------------
209:
210: /**
211: * Group name iteration, iterate all the names in the current 32-group and
212: * returns the first codepoint that has a valid name.
213: * @param result stores the result codepoint and name
214: * @param limit last codepoint + 1 in range to search
215: * @return false if a codepoint with a name is found in group and we can
216: * bail from further iteration, true to continue on with the
217: * iteration
218: */
219: private boolean iterateSingleGroup(ValueIterator.Element result,
220: int limit) {
221: synchronized (GROUP_OFFSETS_) {
222: synchronized (GROUP_LENGTHS_) {
223: int index = m_name_.getGroupLengths(m_groupIndex_,
224: GROUP_OFFSETS_, GROUP_LENGTHS_);
225: while (m_current_ < limit) {
226: int offset = UCharacterName
227: .getGroupOffset(m_current_);
228: String name = m_name_.getGroupName(index
229: + GROUP_OFFSETS_[offset],
230: GROUP_LENGTHS_[offset], m_choice_);
231: if ((name == null || name.length() == 0)
232: && m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
233: name = m_name_.getExtendedName(m_current_);
234: }
235: if (name != null && name.length() > 0) {
236: result.integer = m_current_;
237: result.value = name;
238: return false;
239: }
240: ++m_current_;
241: }
242: }
243: }
244: return true;
245: }
246:
247: /**
248: * Group name iteration, iterate all the names in the current 32-group and
249: * returns the first codepoint that has a valid name.
250: * @param result stores the result codepoint and name
251: * @param limit last codepoint + 1 in range to search
252: * @return false if a codepoint with a name is found in group and we can
253: * bail from further iteration, true to continue on with the
254: * iteration
255: */
256: private boolean iterateGroup(ValueIterator.Element result, int limit) {
257: if (m_groupIndex_ < 0) {
258: m_groupIndex_ = m_name_.getGroup(m_current_);
259: }
260:
261: while (m_groupIndex_ < m_name_.m_groupcount_
262: && m_current_ < limit) {
263: // iterate till the last group or the last codepoint
264: int startMSB = UCharacterName.getCodepointMSB(m_current_);
265: int gMSB = m_name_.getGroupMSB(m_groupIndex_); // can be -1
266: if (startMSB == gMSB) {
267: if (startMSB == UCharacterName
268: .getCodepointMSB(limit - 1)) {
269: // if start and limit - 1 are in the same group, then enumerate
270: // only in that one
271: return iterateSingleGroup(result, limit);
272: }
273: // enumerate characters in the partial start group
274: // if (m_name_.getGroupOffset(m_current_) != 0) {
275: if (!iterateSingleGroup(result, UCharacterName
276: .getGroupLimit(gMSB))) {
277: return false;
278: }
279: ++m_groupIndex_; // continue with the next group
280: } else if (startMSB > gMSB) {
281: // make sure that we start enumerating with the first group
282: // after start
283: m_groupIndex_++;
284: } else {
285: int gMIN = UCharacterName.getGroupMin(gMSB);
286: if (gMIN > limit) {
287: gMIN = limit;
288: }
289: if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
290: if (!iterateExtended(result, gMIN)) {
291: return false;
292: }
293: }
294: m_current_ = gMIN;
295: }
296: }
297:
298: return true;
299: }
300:
301: /**
302: * Iterate extended names.
303: * @param result stores the result codepoint and name
304: * @param limit last codepoint + 1 in range to search
305: * @return false if a codepoint with a name is found and we can
306: * bail from further iteration, true to continue on with the
307: * iteration (this will always be false for valid codepoints)
308: */
309: private boolean iterateExtended(ValueIterator.Element result,
310: int limit) {
311: while (m_current_ < limit) {
312: String name = m_name_.getExtendedOr10Name(m_current_);
313: if (name != null && name.length() > 0) {
314: result.integer = m_current_;
315: result.value = name;
316: return false;
317: }
318: ++m_current_;
319: }
320: return true;
321: }
322: }
|