001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package java.text;
019:
020: /**
021: * <p>
022: * <code>CollationElementIterator</code> is created by a
023: * <code>RuleBasedCollator</code> to iterate through a string. The return
024: * result of each iteration is a 32-bit collation element that defines the
025: * ordering priority of the next character or sequence of characters in the
026: * source string.
027: * </p>
028: * <p>
029: * For illustration, consider the following in Spanish:
030: * </p>
031: *
032: * <p>
033: * <code>
034: * "ca" -> the first collation element is collation_element('c') and second
035: * collation element is collation_element('a').
036: * </code>
037: * </p>
038: *
039: * <p>
040: * <code>
041: * Since "ch" in Spanish sorts as one entity, the below example returns one
042: * collation element for the two characters 'c' and 'h'
043: * </code>
044: * </p>
045: *
046: * <p>
047: * <code>
048: * "cha" -> the first collation element is collation_element('ch') and second
049: * collation element is collation_element('a').
050: * </code>
051: * </p>
052: * <p>
053: * And in German,
054: * </p>
055: *
056: * <p>
057: * <code>
058: * Since the character '\u0086' is a composed character of 'a' and 'e', the iterator
059: * returns two collation elements for the single character '\u0086'
060: * </code>
061: * </p>
062: * <p>
063: * <code>
064: * "\u0086b" -> the first
065: * collation element is collation_element('a'), the second collation element is
066: * collation_element('e'), and the third collation element is
067: * collation_element('b').
068: * </code>
069: * </p>
070: *
071: */
072: public final class CollationElementIterator {
073:
074: /**
075: * This constant is returned by the iterator in the methods
076: * <code>next()</code> and <code>previous()</code> when the end or the
077: * beginning of the source string has been reached, and there are no more
078: * valid collation elements to return.
079: */
080: public static final int NULLORDER = -1;
081:
082: private com.ibm.icu.text.CollationElementIterator icuIterator;
083:
084: CollationElementIterator(
085: com.ibm.icu.text.CollationElementIterator iterator) {
086: this .icuIterator = iterator;
087: }
088:
089: /**
090: * Obtains the maximum length of any expansion sequence that ends with the
091: * specified collation element. If there is no expansion with this collation
092: * element as the last element, returns <code>1</code>.
093: *
094: * @param order
095: * a collation element that has been previously obtained from a
096: * call to either the {@link #next()} or {@link #previous()}
097: * method.
098: * @return the maximum length of any expansion sequence ending with the
099: * specified collation element.
100: */
101: public int getMaxExpansion(int order) {
102: return this .icuIterator.getMaxExpansion(order);
103: }
104:
105: /**
106: * Obtains the character offset in the source string corresponding to the
107: * next collation element. This value could be any of: <ui>
108: * <li>The index of the first character in the source string that matches
109: * the value of the next collation element. (This means that if
110: * setOffset(offset) sets the index in the middle of a contraction,
111: * getOffset() returns the index of the first character in the contraction,
112: * which may not be equal to the original offset that was set. Hence calling
113: * getOffset() immediately after setOffset(offset) does not guarantee that
114: * the original offset set will be returned.)</li>
115: * <li>If normalization is on, the index of the immediate subsequent
116: * character, or composite character with the first character, having a
117: * combining class of 0.</li>
118: * <li>The length of the source string, if iteration has reached the end.
119: * </li>
120: * <ui>
121: *
122: * @return The position of the collation element in the source string that
123: * will be returned in the next invocation of the {@link #next()}
124: * method.
125: */
126: public int getOffset() {
127: return this .icuIterator.getOffset();
128: }
129:
130: /**
131: * Obtains the next collation element in the source string.
132: *
133: * @return the next collation element or <code>NULLORDER</code> if the end
134: * of the iteration has been reached.
135: */
136: public int next() {
137: return this .icuIterator.next();
138: }
139:
140: /**
141: * Obtains the previous collation element in the source string.
142: *
143: * @return the previous collation element, or <code>NULLORDER</code> when
144: * the start of the iteration has been reached.
145: */
146: public int previous() {
147: return this .icuIterator.previous();
148: }
149:
150: /**
151: * Obtains the primary order of the specified collation element, i.e. the
152: * first 16 bits. This value is unsigned.
153: *
154: * @param order
155: * @return the element's 16 bits primary order.
156: */
157: public static final int primaryOrder(int order) {
158: return com.ibm.icu.text.CollationElementIterator
159: .primaryOrder(order);
160: }
161:
162: /**
163: * Repositions the cursor to point at the first element of the current
164: * string. The next call to <code>next()</code> or <code>previous()</code>
165: * will return the first and last collation element in the string,
166: * respectively.
167: * <p>
168: * If the <code>RuleBasedCollator</code> used by this iterator has had its
169: * attributes changed, calling <code>reset()</code> will reinitialize the
170: * iterator to use the new attributes.
171: * </p>
172: */
173: public void reset() {
174: this .icuIterator.reset();
175: }
176:
177: /**
178: * Obtains the secondary order of the specified collation element, i.e. the
179: * 16th to 23th bits, inclusive. This value is unsigned.
180: *
181: * @param order
182: * @return the 8 bit secondary order of the element
183: */
184: public static final short secondaryOrder(int order) {
185: return (short) com.ibm.icu.text.CollationElementIterator
186: .secondaryOrder(order);
187: }
188:
189: /**
190: * Points the iterator at the collation element associated with the
191: * character in the source string which is found at the supplied offset.
192: * After this call completes, an invocation of the {@link #next()} method
193: * will return this collation element.
194: * <p>
195: * If <code>newOffset</code> corresponds to a character which is part of a
196: * sequence that maps to a single collation element the iterator is adjusted
197: * to the start of that sequence. As a result of this, any subsequent call
198: * made to <code>getOffset()</code> may not return the same value set by
199: * this method.
200: * </p>
201: * <p>
202: * If the decomposition mode is on, and offset is in the middle of a
203: * decomposable range of source text, the iterator may not return a correct
204: * result for the next forwards or backwards iteration. The user must ensure
205: * that the offset is not in the middle of a decomposable range.
206: * </p>
207: *
208: * @param newOffset
209: * the character offset into the original source string to set.
210: * Note that this is not an offset into the corresponding
211: * sequence of collation elements.
212: */
213: public void setOffset(int newOffset) {
214: this .icuIterator.setOffset(newOffset);
215: }
216:
217: /**
218: * Sets a new source string iterator for iteration, and reset the offset to
219: * the beginning of the text.
220: *
221: * @param source
222: * the new source string iterator for iteration.
223: */
224: public void setText(CharacterIterator source) {
225: this .icuIterator.setText(source);
226: }
227:
228: /**
229: * Sets a new source string for iteration, and reset the offset to the
230: * beginning of the text.
231: *
232: * @param source
233: * the new source string for iteration
234: */
235: public void setText(String source) {
236: this .icuIterator.setText(source);
237: }
238:
239: /**
240: * Obtains the tertiary order of the specified collation element, i.e. the
241: * last 8 bits. This value is unsigned.
242: *
243: * @param order
244: * @return the 8 bits tertiary order of the element
245: */
246: public static final short tertiaryOrder(int order) {
247: return (short) com.ibm.icu.text.CollationElementIterator
248: .tertiaryOrder(order);
249: }
250: }
|