001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package java.text;
019:
020: import java.util.Locale;
021:
022: /**
023: * This class is used to locate the boundaries of text. Instance of this class
024: * can be got by some factory methods:
025: * <ul>
026: * <li>
027: * <code>getCharacterInstance()<code> returns a BreakIterator that iterate the
028: * logical characters without worrying about how the character is stored. For
029: * example, some character may be stored in more than one Unicode code point
030: * according to Unicode specification, this character can handle the logical
031: * characters with multi code points.</li>
032: * <li>
033: * <code>getWordInstance()<code> returns a <code>BreakIterator</code> that
034: * iterate the word-breaks. The beginning and end of each word(including numbers)
035: * is treated as boundary position. Whitespace and punctuation are kept separate
036: * from real words.</li>
037: * <li>
038: * <code>getSentenceInstance()</code> returns a BreakIterator that iterate the
039: * sentence-breaks.</li>
040: * <li><code>getLineInstance()</code> returns a BreakIterator that iterate the
041: * line-breaks which can be used to wrap lines. This iterator can handle whitespaces,
042: * hyphens and punctuations.
043: * </ul>
044: *
045: * <code>BreakIterator</code> uses <code>CharacterIterator</code> to perform the
046: * analysis, so that any storage which provides <code>CharacterIterator</code>
047: * interface.
048: *
049: * @see CharacterIterator
050: */
051: public abstract class BreakIterator implements Cloneable {
052:
053: /*
054: * -----------------------------------------------------------------------
055: * constants
056: * -----------------------------------------------------------------------
057: */
058: /**
059: * This constant is returned by iterate methods like previous() or next() if
060: * they have returned all valid boundaries.
061: */
062: public static final int DONE = -1;
063:
064: private static final int LONG_LENGTH = 8;
065:
066: private static final int INT_LENGTH = 4;
067:
068: private static final int SHORT_LENGTH = 2;
069:
070: /*
071: * -----------------------------------------------------------------------
072: * variables
073: * -----------------------------------------------------------------------
074: */
075: // the wrapped ICU implementation
076: com.ibm.icu.text.BreakIterator wrapped;
077:
078: /*
079: * -----------------------------------------------------------------------
080: * constructors
081: * -----------------------------------------------------------------------
082: */
083: /**
084: * Default constructor, just for invocation by subclass.
085: */
086: protected BreakIterator() {
087: super ();
088: }
089:
090: /*
091: * wrapping constructor
092: */
093: BreakIterator(com.ibm.icu.text.BreakIterator iterator) {
094: wrapped = iterator;
095: }
096:
097: /*
098: * -----------------------------------------------------------------------
099: * methods
100: * -----------------------------------------------------------------------
101: */
102: /**
103: * Return all supported locales.
104: *
105: * @return all supported locales
106: */
107: public static Locale[] getAvailableLocales() {
108: return com.ibm.icu.text.BreakIterator.getAvailableLocales();
109: }
110:
111: /**
112: * Return a new instance of BreakIterator used to iterate characters using
113: * default locale.
114: *
115: * @return a new instance of BreakIterator used to iterate characters using
116: * default locale.
117: */
118: public static BreakIterator getCharacterInstance() {
119: return new RuleBasedBreakIterator(
120: com.ibm.icu.text.BreakIterator.getCharacterInstance());
121: }
122:
123: /**
124: * Return a new instance of BreakIterator used to iterate characters using
125: * given locale.
126: *
127: * @param where
128: * the given locale
129: * @return a new instance of BreakIterator used to iterate characters using
130: * given locale.
131: */
132: public static BreakIterator getCharacterInstance(Locale where) {
133: if (where == null) {
134: throw new NullPointerException();
135: }
136:
137: return new RuleBasedBreakIterator(
138: com.ibm.icu.text.BreakIterator
139: .getCharacterInstance(where));
140: }
141:
142: /**
143: * Return a new instance of BreakIterator used to iterate line-breaks using
144: * default locale.
145: *
146: * @return a new instance of BreakIterator used to iterate line-breaks using
147: * default locale.
148: */
149: public static BreakIterator getLineInstance() {
150: return new RuleBasedBreakIterator(
151: com.ibm.icu.text.BreakIterator.getLineInstance());
152: }
153:
154: /**
155: * Return a new instance of BreakIterator used to iterate line-breaks using
156: * given locale.
157: *
158: * @param where
159: * the given locale
160: * @return a new instance of BreakIterator used to iterate line-breaks using
161: * given locale.
162: */
163: public static BreakIterator getLineInstance(Locale where) {
164: if (where == null) {
165: throw new NullPointerException();
166: }
167:
168: return new RuleBasedBreakIterator(
169: com.ibm.icu.text.BreakIterator.getLineInstance(where));
170: }
171:
172: /**
173: * Return a new instance of BreakIterator used to iterate sentence-breaks
174: * using default locale.
175: *
176: * @return a new instance of BreakIterator used to iterate sentence-breaks
177: * using default locale.
178: */
179: public static BreakIterator getSentenceInstance() {
180: return new RuleBasedBreakIterator(
181: com.ibm.icu.text.BreakIterator.getSentenceInstance());
182: }
183:
184: /**
185: * Return a new instance of BreakIterator used to iterate sentence-breaks
186: * using given locale.
187: *
188: * @param where
189: * the given locale
190: * @return a new instance of BreakIterator used to iterate sentence-breaks
191: * using given locale.
192: */
193: public static BreakIterator getSentenceInstance(Locale where) {
194: if (where == null) {
195: throw new NullPointerException();
196: }
197:
198: return new RuleBasedBreakIterator(
199: com.ibm.icu.text.BreakIterator
200: .getSentenceInstance(where));
201: }
202:
203: /**
204: * Return a new instance of BreakIterator used to iterate word-breaks using
205: * default locale.
206: *
207: * @return a new instance of BreakIterator used to iterate word-breaks using
208: * default locale.
209: */
210: public static BreakIterator getWordInstance() {
211: return new RuleBasedBreakIterator(
212: com.ibm.icu.text.BreakIterator.getWordInstance());
213: }
214:
215: /**
216: * Return a new instance of BreakIterator used to iterate word-breaks using
217: * given locale.
218: *
219: * @param where
220: * the given locale
221: * @return a new instance of BreakIterator used to iterate word-breaks using
222: * given locale.
223: */
224: public static BreakIterator getWordInstance(Locale where) {
225: if (where == null) {
226: throw new NullPointerException();
227: }
228:
229: return new RuleBasedBreakIterator(
230: com.ibm.icu.text.BreakIterator.getWordInstance(where));
231: }
232:
233: /**
234: * Return true if the given offset is a boundary position. If this method
235: * returns true, the current iteration position is set to the given
236: * position; if the function returns false, the current iteration position
237: * is set as though following() had been called.
238: *
239: * @param offset
240: * the given offset to check
241: * @return true if the given offset is a boundary position
242: */
243: public boolean isBoundary(int offset) {
244: return wrapped.isBoundary(offset);
245: }
246:
247: /**
248: * Return the position of last boundary precede the given offset, and set
249: * current position to returned value, or <code>DONE</code> if the given
250: * offset specifies the starting position.
251: * <p>
252: * <code>IllegalArgumentException</code> will be thrown if given offset is
253: * invalid.
254: * </p>
255: *
256: * @param offset
257: * the given start position to be searched for
258: * @return the position of last boundary precede the given offset
259: */
260: public int preceding(int offset) {
261: return wrapped.preceding(offset);
262: }
263:
264: /**
265: * Set the new text string to be analyzed, the current position will be
266: * reset to beginning of this new string, and the old string will lost.
267: *
268: * @param newText
269: * the new text string to be analyzed
270: */
271: public void setText(String newText) {
272: wrapped.setText(newText);
273: }
274:
275: /*
276: * -----------------------------------------------------------------------
277: * abstract methods
278: * -----------------------------------------------------------------------
279: */
280: /**
281: * Return this iterator's current position.
282: *
283: * @return this iterator's current position
284: */
285: public abstract int current();
286:
287: /**
288: * Set this iterator's current position to the first boundary, and return
289: * this position.
290: *
291: * @return the position of first boundary
292: */
293: public abstract int first();
294:
295: /**
296: * Set the position of the first boundary following the given offset, and
297: * return this position. If there is no boundary after the given offset,
298: * return DONE.
299: * <p>
300: * <code>IllegalArgumentException</code> will be thrown if given offset is
301: * invalid.
302: * </p>
303: *
304: * @param offset
305: * the given position to be searched for
306: * @return the position of the first boundary following the given offset
307: */
308: public abstract int following(int offset);
309:
310: /**
311: * Return a <code>CharacterIterator</code> which represents the text being
312: * analyzed. Please note that the returned value is probably the internal
313: * iterator used by this object, so that if the invoker want to modify the
314: * status of the returned iterator, a clone operation at first is
315: * recommended.
316: *
317: * @return a <code>CharacterIterator</code> which represents the text
318: * being analyzed.
319: */
320: public abstract CharacterIterator getText();
321:
322: /**
323: * Set this iterator's current position to the last boundary, and return
324: * this position.
325: *
326: * @return the position of last boundary
327: */
328: public abstract int last();
329:
330: /**
331: * Set this iterator's current position to the next boundary after current
332: * position, and return this position. Return <code>DONE</code> if no
333: * boundary found after current position.
334: *
335: * @return the position of last boundary
336: */
337: public abstract int next();
338:
339: /**
340: * Set this iterator's current position to the next boundary after the given
341: * position, and return this position. Return <code>DONE</code> if no
342: * boundary found after the given position.
343: *
344: * @param n
345: * the given position.
346: * @return the position of last boundary
347: */
348: public abstract int next(int n);
349:
350: /**
351: * Set this iterator's current position to the previous boundary before
352: * current position, and return this position. Return <code>DONE</code> if
353: * no boundary found before current position.
354: *
355: * @return the position of last boundary
356: */
357: public abstract int previous();
358:
359: /**
360: * Set new text to be analyzed by given <code>CharacterIterator</code>.
361: * The position will be reset to the beginning of the new text, and other
362: * status of this iterator will be kept.
363: *
364: * @param newText
365: * the given <code>CharacterIterator</code> refer to the text
366: * to be analyzed
367: */
368: public abstract void setText(CharacterIterator newText);
369:
370: /*
371: * -----------------------------------------------------------------------
372: * methods override Object
373: * -----------------------------------------------------------------------
374: */
375: /**
376: * Create copy of this iterator, all status including current position is
377: * kept.
378: *
379: * @return copy of this iterator
380: */
381: @Override
382: public Object clone() {
383: try {
384: BreakIterator cloned = (BreakIterator) super .clone();
385: cloned.wrapped = (com.ibm.icu.text.BreakIterator) wrapped
386: .clone();
387: return cloned;
388: } catch (CloneNotSupportedException e) {
389: throw new InternalError(e.getMessage());
390: }
391: }
392:
393: /**
394: * Get a long value from the given byte array, start from given offset.
395: *
396: * @param buf
397: * the bytes to be converted
398: * @param offset
399: * the start position of conversion
400: * @return the converted long value
401: */
402: protected static long getLong(byte[] buf, int offset) {
403: if (null == buf) {
404: throw new NullPointerException();
405: }
406: if (offset < 0 || buf.length - offset < LONG_LENGTH) {
407: throw new ArrayIndexOutOfBoundsException();
408: }
409: long result = 0;
410: for (int i = offset; i < offset + LONG_LENGTH; i++) {
411: result = (result << 8) | (buf[i] & 0xff);
412: }
413: return result;
414: }
415:
416: /**
417: * Get an int value from the given byte array, start from given offset.
418: *
419: * @param buf
420: * the bytes to be converted
421: * @param offset
422: * the start position of conversion
423: * @return the converted int value
424: */
425: protected static int getInt(byte[] buf, int offset) {
426: if (null == buf) {
427: throw new NullPointerException();
428: }
429: if (offset < 0 || buf.length - INT_LENGTH < offset) {
430: throw new ArrayIndexOutOfBoundsException();
431: }
432: int result = 0;
433: for (int i = offset; i < offset + INT_LENGTH; i++) {
434: result = (result << 8) | (buf[i] & 0xff);
435: }
436: return result;
437: }
438:
439: /**
440: * Get a short value from the given byte array, start from given offset.
441: *
442: * @param buf
443: * the bytes to be converted
444: * @param offset
445: * the start position of conversion
446: * @return the converted short value
447: */
448: protected static short getShort(byte[] buf, int offset) {
449: if (null == buf) {
450: throw new NullPointerException();
451: }
452: if (offset < 0 || buf.length - SHORT_LENGTH < offset) {
453: throw new ArrayIndexOutOfBoundsException();
454: }
455: short result = 0;
456: for (int i = offset; i < offset + SHORT_LENGTH; i++) {
457: result = (short) ((result << 8) | (buf[i] & 0xff));
458: }
459: return result;
460: }
461: }
|