001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007:
008: package com.ibm.icu.text;
009:
010: import java.lang.ref.SoftReference;
011: import java.text.CharacterIterator;
012: import java.text.StringCharacterIterator;
013: import java.util.Locale;
014: import java.util.MissingResourceException;
015:
016: import com.ibm.icu.impl.ICUDebug;
017: import com.ibm.icu.util.ULocale;
018:
019: /**
020: * A class that locates boundaries in text. This class defines a protocol for
021: * objects that break up a piece of natural-language text according to a set
022: * of criteria. Instances or subclasses of BreakIterator can be provided, for
023: * example, to break a piece of text into words, sentences, or logical characters
024: * according to the conventions of some language or group of languages.
025: *
026: * We provide five built-in types of BreakIterator:
027: * <ul><li>getTitleInstance() returns a BreakIterator that locates boundaries
028: * between title breaks.
029: * <li>getSentenceInstance() returns a BreakIterator that locates boundaries
030: * between sentences. This is useful for triple-click selection, for example.
031: * <li>getWordInstance() returns a BreakIterator that locates boundaries between
032: * words. This is useful for double-click selection or "find whole words" searches.
033: * This type of BreakIterator makes sure there is a boundary position at the
034: * beginning and end of each legal word. (Numbers count as words, too.) Whitespace
035: * and punctuation are kept separate from real words.
036: * <li>getLineInstance() returns a BreakIterator that locates positions where it is
037: * legal for a text editor to wrap lines. This is similar to word breaking, but
038: * not the same: punctuation and whitespace are generally kept with words (you don't
039: * want a line to start with whitespace, for example), and some special characters
040: * can force a position to be considered a line-break position or prevent a position
041: * from being a line-break position.
042: * <li>getCharacterInstance() returns a BreakIterator that locates boundaries between
043: * logical characters. Because of the structure of the Unicode encoding, a logical
044: * character may be stored internally as more than one Unicode code point. (A with an
045: * umlaut may be stored as an a followed by a separate combining umlaut character,
046: * for example, but the user still thinks of it as one character.) This iterator allows
047: * various processes (especially text editors) to treat as characters the units of text
048: * that a user would think of as characters, rather than the units of text that the
049: * computer sees as "characters".</ul>
050: *
051: * BreakIterator's interface follows an "iterator" model (hence the name), meaning it
052: * has a concept of a "current position" and methods like first(), last(), next(),
053: * and previous() that update the current position. All BreakIterators uphold the
054: * following invariants:
055: * <ul><li>The beginning and end of the text are always treated as boundary positions.
056: * <li>The current position of the iterator is always a boundary position (random-
057: * access methods move the iterator to the nearest boundary position before or
058: * after the specified position, not _to_ the specified position).
059: * <li>DONE is used as a flag to indicate when iteration has stopped. DONE is only
060: * returned when the current position is the end of the text and the user calls next(),
061: * or when the current position is the beginning of the text and the user calls
062: * previous().
063: * <li>Break positions are numbered by the positions of the characters that follow
064: * them. Thus, under normal circumstances, the position before the first character
065: * is 0, the position after the first character is 1, and the position after the
066: * last character is 1 plus the length of the string.
067: * <li>The client can change the position of an iterator, or the text it analyzes,
068: * at will, but cannot change the behavior. If the user wants different behavior, he
069: * must instantiate a new iterator.</ul>
070: *
071: * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes
072: * it possible to use BreakIterator to analyze text in any text-storage vehicle that
073: * provides a CharacterIterator interface.
074: *
075: * <b>NOTE:</b> Some types of BreakIterator can take a long time to create, and
076: * instances of BreakIterator are not currently cached by the system. For
077: * optimal performance, keep instances of BreakIterator around as long as makes
078: * sense. For example, when word-wrapping a document, don't create and destroy a
079: * new BreakIterator for each line. Create one break iterator for the whole document
080: * (or whatever stretch of text you're wrapping) and use it to do the whole job of
081: * wrapping the text.
082: *
083: * <P>
084: * <strong>Examples</strong>:<P>
085: * Creating and using text boundaries
086: * <blockquote>
087: * <pre>
088: * public static void main(String args[]) {
089: * if (args.length == 1) {
090: * String stringToExamine = args[0];
091: * //print each word in order
092: * BreakIterator boundary = BreakIterator.getWordInstance();
093: * boundary.setText(stringToExamine);
094: * printEachForward(boundary, stringToExamine);
095: * //print each sentence in reverse order
096: * boundary = BreakIterator.getSentenceInstance(Locale.US);
097: * boundary.setText(stringToExamine);
098: * printEachBackward(boundary, stringToExamine);
099: * printFirst(boundary, stringToExamine);
100: * printLast(boundary, stringToExamine);
101: * }
102: * }
103: * </pre>
104: * </blockquote>
105: *
106: * Print each element in order
107: * <blockquote>
108: * <pre>
109: * public static void printEachForward(BreakIterator boundary, String source) {
110: * int start = boundary.first();
111: * for (int end = boundary.next();
112: * end != BreakIterator.DONE;
113: * start = end, end = boundary.next()) {
114: * System.out.println(source.substring(start,end));
115: * }
116: * }
117: * </pre>
118: * </blockquote>
119: *
120: * Print each element in reverse order
121: * <blockquote>
122: * <pre>
123: * public static void printEachBackward(BreakIterator boundary, String source) {
124: * int end = boundary.last();
125: * for (int start = boundary.previous();
126: * start != BreakIterator.DONE;
127: * end = start, start = boundary.previous()) {
128: * System.out.println(source.substring(start,end));
129: * }
130: * }
131: * </pre>
132: * </blockquote>
133: *
134: * Print first element
135: * <blockquote>
136: * <pre>
137: * public static void printFirst(BreakIterator boundary, String source) {
138: * int start = boundary.first();
139: * int end = boundary.next();
140: * System.out.println(source.substring(start,end));
141: * }
142: * </pre>
143: * </blockquote>
144: *
145: * Print last element
146: * <blockquote>
147: * <pre>
148: * public static void printLast(BreakIterator boundary, String source) {
149: * int end = boundary.last();
150: * int start = boundary.previous();
151: * System.out.println(source.substring(start,end));
152: * }
153: * </pre>
154: * </blockquote>
155: *
156: * Print the element at a specified position
157: * <blockquote>
158: * <pre>
159: * public static void printAt(BreakIterator boundary, int pos, String source) {
160: * int end = boundary.following(pos);
161: * int start = boundary.previous();
162: * System.out.println(source.substring(start,end));
163: * }
164: * </pre>
165: * </blockquote>
166: *
167: * Find the next word
168: * <blockquote>
169: * <pre>
170: * public static int nextWordStartAfter(int pos, String text) {
171: * BreakIterator wb = BreakIterator.getWordInstance();
172: * wb.setText(text);
173: * int last = wb.following(pos);
174: * int current = wb.next();
175: * while (current != BreakIterator.DONE) {
176: * for (int p = last; p < current; p++) {
177: * if (Character.isLetter(text.charAt(p))
178: * return last;
179: * }
180: * last = current;
181: * current = wb.next();
182: * }
183: * return BreakIterator.DONE;
184: * }
185: * </pre>
186: * (The iterator returned by BreakIterator.getWordInstance() is unique in that
187: * the break positions it returns don't represent both the start and end of the
188: * thing being iterated over. That is, a sentence-break iterator returns breaks
189: * that each represent the end of one sentence and the beginning of the next.
190: * With the word-break iterator, the characters between two boundaries might be a
191: * word, or they might be the punctuation or whitespace between two words. The
192: * above code uses a simple heuristic to determine which boundary is the beginning
193: * of a word: If the characters between this boundary and the next boundary
194: * include at least one letter (this can be an alphabetical letter, a CJK ideograph,
195: * a Hangul syllable, a Kana character, etc.), then the text between this boundary
196: * and the next is a word; otherwise, it's the material between words.)
197: * </blockquote>
198: *
199: * @see CharacterIterator
200: * @stable ICU 2.0
201: *
202: */
203:
204: public abstract class BreakIterator implements Cloneable {
205:
206: private static final boolean DEBUG = ICUDebug
207: .enabled("breakiterator");
208:
209: /**
210: * Default constructor. There is no state that is carried by this abstract
211: * base class.
212: * @stable ICU 2.0
213: */
214: protected BreakIterator() {
215: }
216:
217: /**
218: * Clone method. Creates another BreakIterator with the same behavior and
219: * current state as this one.
220: * @return The clone.
221: * @stable ICU 2.0
222: */
223: public Object clone() {
224: try {
225: return super .clone();
226: } catch (CloneNotSupportedException e) {
227: ///CLOVER:OFF
228: throw new IllegalStateException();
229: ///CLOVER:ON
230: }
231: }
232:
233: /**
234: * DONE is returned by previous() and next() after all valid
235: * boundaries have been returned.
236: * @stable ICU 2.0
237: */
238: public static final int DONE = -1;
239:
240: /**
241: * Return the first boundary position. This is always the beginning
242: * index of the text this iterator iterates over. For example, if
243: * the iterator iterates over a whole string, this function will
244: * always return 0. This function also updates the iteration position
245: * to point to the beginning of the text.
246: * @return The character offset of the beginning of the stretch of text
247: * being broken.
248: * @stable ICU 2.0
249: */
250: public abstract int first();
251:
252: /**
253: * Return the last boundary position. This is always the "past-the-end"
254: * index of the text this iterator iterates over. For example, if the
255: * iterator iterates over a whole string (call it "text"), this function
256: * will always return text.length(). This function also updated the
257: * iteration position to point to the end of the text.
258: * @return The character offset of the end of the stretch of text
259: * being broken.
260: * @stable ICU 2.0
261: */
262: public abstract int last();
263:
264: /**
265: * Advances the specified number of steps forward in the text (a negative
266: * number, therefore, advances backwards). If this causes the iterator
267: * to advance off either end of the text, this function returns DONE;
268: * otherwise, this function returns the position of the appropriate
269: * boundary. Calling this function is equivalent to calling next() or
270: * previous() n times.
271: * @param n The number of boundaries to advance over (if positive, moves
272: * forward; if negative, moves backwards).
273: * @return The position of the boundary n boundaries from the current
274: * iteration position, or DONE if moving n boundaries causes the iterator
275: * to advance off either end of the text.
276: * @stable ICU 2.0
277: */
278: public abstract int next(int n);
279:
280: /**
281: * Advances the iterator forward one boundary. The current iteration
282: * position is updated to point to the next boundary position after the
283: * current position, and this is also the value that is returned. If
284: * the current position is equal to the value returned by last(), or to
285: * DONE, this function returns DONE and sets the current position to
286: * DONE.
287: * @return The position of the first boundary position following the
288: * iteration position.
289: * @stable ICU 2.0
290: */
291: public abstract int next();
292:
293: /**
294: * Advances the iterator backward one boundary. The current iteration
295: * position is updated to point to the last boundary position before
296: * the current position, and this is also the value that is returned. If
297: * the current position is equal to the value returned by first(), or to
298: * DONE, this function returns DONE and sets the current position to
299: * DONE.
300: * @return The position of the last boundary position preceding the
301: * iteration position.
302: * @stable ICU 2.0
303: */
304: public abstract int previous();
305:
306: /**
307: * Sets the iterator's current iteration position to be the first
308: * boundary position following the specified position. (Whether the
309: * specified position is itself a boundary position or not doesn't
310: * matter-- this function always moves the iteration position to the
311: * first boundary after the specified position.) If the specified
312: * position is the past-the-end position, returns DONE.
313: * @param offset The character position to start searching from.
314: * @return The position of the first boundary position following
315: * "offset" (whether or not "offset" itself is a boundary position),
316: * or DONE if "offset" is the past-the-end offset.
317: * @stable ICU 2.0
318: */
319: public abstract int following(int offset);
320:
321: /**
322: * Sets the iterator's current iteration position to be the last
323: * boundary position preceding the specified position. (Whether the
324: * specified position is itself a boundary position or not doesn't
325: * matter-- this function always moves the iteration position to the
326: * last boundary before the specified position.) If the specified
327: * position is the starting position, returns DONE.
328: * @param offset The character position to start searching from.
329: * @return The position of the last boundary position preceding
330: * "offset" (whether of not "offset" itself is a boundary position),
331: * or DONE if "offset" is the starting offset of the iterator.
332: * @stable ICU 2.0
333: */
334: public int preceding(int offset) {
335: // NOTE: This implementation is here solely because we can't add new
336: // abstract methods to an existing class. There is almost ALWAYS a
337: // better, faster way to do this.
338: int pos = following(offset);
339: while (pos >= offset && pos != DONE)
340: pos = previous();
341: return pos;
342: }
343:
344: /**
345: * Return true if the specfied position is a boundary position. If the
346: * function returns true, the current iteration position is set to the
347: * specified position; if the function returns false, the current
348: * iteration position is set as though following() had been called.
349: * @param offset the offset to check.
350: * @return True if "offset" is a boundary position.
351: * @stable ICU 2.0
352: */
353: public boolean isBoundary(int offset) {
354: // Again, this is the default implementation, which is provided solely because
355: // we couldn't add a new abstract method to an existing class. The real
356: // implementations will usually need to do a little more work.
357: if (offset == 0) {
358: return true;
359: } else
360: return following(offset - 1) == offset;
361: }
362:
363: /**
364: * Return the iterator's current position.
365: * @return The iterator's current position.
366: * @stable ICU 2.0
367: */
368: public abstract int current();
369:
370: /**
371: * Returns a CharacterIterator over the text being analyzed.
372: * For at least some subclasses of BreakIterator, this is a reference
373: * to the <b>actual iterator being used</b> by the BreakIterator,
374: * and therefore, this function's return value should be treated as
375: * <tt>const</tt>. No guarantees are made about the current position
376: * of this iterator when it is returned. If you need to move that
377: * position to examine the text, clone this function's return value first.
378: * @return A CharacterIterator over the text being analyzed.
379: * @stable ICU 2.0
380: */
381: public abstract CharacterIterator getText();
382:
383: /**
384: * Sets the iterator to analyze a new piece of text. The new
385: * piece of text is passed in as a String, and the current
386: * iteration position is reset to the beginning of the string.
387: * (The old text is dropped.)
388: * @param newText A String containing the text to analyze with
389: * this BreakIterator.
390: * @stable ICU 2.0
391: */
392: public void setText(String newText) {
393: setText(new StringCharacterIterator(newText));
394: }
395:
396: /**
397: * Sets the iterator to analyze a new piece of text. The
398: * BreakIterator is passed a CharacterIterator through which
399: * it will access the text itself. The current iteration
400: * position is reset to the CharacterIterator's start index.
401: * (The old iterator is dropped.)
402: * @param newText A CharacterIterator referring to the text
403: * to analyze with this BreakIterator (the iterator's current
404: * position is ignored, but its other state is significant).
405: * @stable ICU 2.0
406: */
407: public abstract void setText(CharacterIterator newText);
408:
409: /** @stable ICU 2.4 */
410: public static final int KIND_CHARACTER = 0;
411: /** @stable ICU 2.4 */
412: public static final int KIND_WORD = 1;
413: /** @stable ICU 2.4 */
414: public static final int KIND_LINE = 2;
415: /** @stable ICU 2.4 */
416: public static final int KIND_SENTENCE = 3;
417: /** @stable ICU 2.4 */
418: public static final int KIND_TITLE = 4;
419:
420: /** @since ICU 2.8 */
421: private static final int KIND_COUNT = 5;
422:
423: /** @internal */
424: private static final SoftReference[] iterCache = new SoftReference[5];
425:
426: /**
427: * Returns a new instance of BreakIterator that locates word boundaries.
428: * This function assumes that the text being analyzed is in the default
429: * locale's language.
430: * @return An instance of BreakIterator that locates word boundaries.
431: * @stable ICU 2.0
432: */
433: public static BreakIterator getWordInstance() {
434: return getWordInstance(ULocale.getDefault());
435: }
436:
437: /**
438: * Returns a new instance of BreakIterator that locates word boundaries.
439: * @param where A locale specifying the language of the text to be
440: * analyzed.
441: * @return An instance of BreakIterator that locates word boundaries.
442: * @stable ICU 2.0
443: */
444: public static BreakIterator getWordInstance(Locale where) {
445: return getBreakInstance(ULocale.forLocale(where), KIND_WORD);
446: }
447:
448: /**
449: * Returns a new instance of BreakIterator that locates word boundaries.
450: * @param where A locale specifying the language of the text to be
451: * analyzed.
452: * @return An instance of BreakIterator that locates word boundaries.
453: * @draft ICU 3.2
454: * @provisional This API might change or be removed in a future release.
455: */
456: public static BreakIterator getWordInstance(ULocale where) {
457: return getBreakInstance(where, KIND_WORD);
458: }
459:
460: /**
461: * Returns a new instance of BreakIterator that locates legal line-
462: * wrapping positions. This function assumes the text being broken
463: * is in the default locale's language.
464: * @return A new instance of BreakIterator that locates legal
465: * line-wrapping positions.
466: * @stable ICU 2.0
467: */
468: public static BreakIterator getLineInstance() {
469: return getLineInstance(ULocale.getDefault());
470: }
471:
472: /**
473: * Returns a new instance of BreakIterator that locates legal line-
474: * wrapping positions.
475: * @param where A Locale specifying the language of the text being broken.
476: * @return A new instance of BreakIterator that locates legal
477: * line-wrapping positions.
478: * @stable ICU 2.0
479: */
480: public static BreakIterator getLineInstance(Locale where) {
481: return getBreakInstance(ULocale.forLocale(where), KIND_LINE);
482: }
483:
484: /**
485: * Returns a new instance of BreakIterator that locates legal line-
486: * wrapping positions.
487: * @param where A Locale specifying the language of the text being broken.
488: * @return A new instance of BreakIterator that locates legal
489: * line-wrapping positions.
490: * @draft ICU 3.2
491: * @provisional This API might change or be removed in a future release.
492: */
493: public static BreakIterator getLineInstance(ULocale where) {
494: return getBreakInstance(where, KIND_LINE);
495: }
496:
497: /**
498: * Returns a new instance of BreakIterator that locates logical-character
499: * boundaries. This function assumes that the text being analyzed is
500: * in the default locale's language.
501: * @return A new instance of BreakIterator that locates logical-character
502: * boundaries.
503: * @stable ICU 2.0
504: */
505: public static BreakIterator getCharacterInstance() {
506: return getCharacterInstance(ULocale.getDefault());
507: }
508:
509: /**
510: * Returns a new instance of BreakIterator that locates logical-character
511: * boundaries.
512: * @param where A Locale specifying the language of the text being analyzed.
513: * @return A new instance of BreakIterator that locates logical-character
514: * boundaries.
515: * @stable ICU 2.0
516: */
517: public static BreakIterator getCharacterInstance(Locale where) {
518: return getBreakInstance(ULocale.forLocale(where),
519: KIND_CHARACTER);
520: }
521:
522: /**
523: * Returns a new instance of BreakIterator that locates logical-character
524: * boundaries.
525: * @param where A Locale specifying the language of the text being analyzed.
526: * @return A new instance of BreakIterator that locates logical-character
527: * boundaries.
528: * @draft ICU 3.2
529: * @provisional This API might change or be removed in a future release.
530: */
531: public static BreakIterator getCharacterInstance(ULocale where) {
532: return getBreakInstance(where, KIND_CHARACTER);
533: }
534:
535: /**
536: * Returns a new instance of BreakIterator that locates sentence boundaries.
537: * This function assumes the text being analyzed is in the default locale's
538: * language.
539: * @return A new instance of BreakIterator that locates sentence boundaries.
540: * @stable ICU 2.0
541: */
542: public static BreakIterator getSentenceInstance() {
543: return getSentenceInstance(ULocale.getDefault());
544: }
545:
546: /**
547: * Returns a new instance of BreakIterator that locates sentence boundaries.
548: * @param where A Locale specifying the language of the text being analyzed.
549: * @return A new instance of BreakIterator that locates sentence boundaries.
550: * @stable ICU 2.0
551: */
552: public static BreakIterator getSentenceInstance(Locale where) {
553: return getBreakInstance(ULocale.forLocale(where), KIND_SENTENCE);
554: }
555:
556: /**
557: * Returns a new instance of BreakIterator that locates sentence boundaries.
558: * @param where A Locale specifying the language of the text being analyzed.
559: * @return A new instance of BreakIterator that locates sentence boundaries.
560: * @draft ICU 3.2
561: * @provisional This API might change or be removed in a future release.
562: */
563: public static BreakIterator getSentenceInstance(ULocale where) {
564: return getBreakInstance(where, KIND_SENTENCE);
565: }
566:
567: /**
568: * Returns a new instance of BreakIterator that locates title boundaries.
569: * This function assumes the text being analyzed is in the default locale's
570: * language. The iterator returned locates title boundaries as described for
571: * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
572: * please use a word boundary iterator. {@link #getWordInstance}
573: * @return A new instance of BreakIterator that locates title boundaries.
574: * @stable ICU 2.0
575: */
576: public static BreakIterator getTitleInstance() {
577: return getTitleInstance(ULocale.getDefault());
578: }
579:
580: /**
581: * Returns a new instance of BreakIterator that locates title boundaries.
582: * The iterator returned locates title boundaries as described for
583: * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
584: * please use Word Boundary iterator.{@link #getWordInstance}
585: * @param where A Locale specifying the language of the text being analyzed.
586: * @return A new instance of BreakIterator that locates title boundaries.
587: * @stable ICU 2.0
588: */
589: public static BreakIterator getTitleInstance(Locale where) {
590: return getBreakInstance(ULocale.forLocale(where), KIND_TITLE);
591: }
592:
593: /**
594: * Returns a new instance of BreakIterator that locates title boundaries.
595: * The iterator returned locates title boundaries as described for
596: * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
597: * please use Word Boundary iterator.{@link #getWordInstance}
598: * @param where A Locale specifying the language of the text being analyzed.
599: * @return A new instance of BreakIterator that locates title boundaries.
600: * @draft ICU 3.2
601: * @provisional This API might change or be removed in a future release.
602: */
603: public static BreakIterator getTitleInstance(ULocale where) {
604: return getBreakInstance(where, KIND_TITLE);
605: }
606:
607: /**
608: * Register a new break iterator of the indicated kind, to use in the given locale.
609: * Clones of the iterator will be returned
610: * if a request for a break iterator of the given kind matches or falls back to
611: * this locale.
612: * @param iter the BreakIterator instance to adopt.
613: * @param locale the Locale for which this instance is to be registered
614: * @param kind the type of iterator for which this instance is to be registered
615: * @return a registry key that can be used to unregister this instance
616: * @stable ICU 2.4
617: */
618: public static Object registerInstance(BreakIterator iter,
619: Locale locale, int kind) {
620: return registerInstance(iter, ULocale.forLocale(locale), kind);
621: }
622:
623: /**
624: * Register a new break iterator of the indicated kind, to use in the given locale.
625: * Clones of the iterator will be returned
626: * if a request for a break iterator of the given kind matches or falls back to
627: * this locale.
628: * @param iter the BreakIterator instance to adopt.
629: * @param locale the Locale for which this instance is to be registered
630: * @param kind the type of iterator for which this instance is to be registered
631: * @return a registry key that can be used to unregister this instance
632: * @draft ICU 3.2
633: * @provisional This API might change or be removed in a future release.
634: */
635: public static Object registerInstance(BreakIterator iter,
636: ULocale locale, int kind) {
637: // If the registered object matches the one in the cache, then
638: // flush the cached object.
639: if (iterCache[kind] != null) {
640: BreakIteratorCache cache = (BreakIteratorCache) iterCache[kind]
641: .get();
642: if (cache != null) {
643: if (cache.getLocale().equals(locale)) {
644: iterCache[kind] = null;
645: }
646: }
647: }
648: return getShim().registerInstance(iter, locale, kind);
649: }
650:
651: /**
652: * Unregister a previously-registered BreakIterator using the key returned from the
653: * register call. Key becomes invalid after this call and should not be used again.
654: * @param key the registry key returned by a previous call to registerInstance
655: * @return true if the iterator for the key was successfully unregistered
656: * @stable ICU 2.4
657: */
658: public static boolean unregister(Object key) {
659: if (key == null) {
660: throw new IllegalArgumentException(
661: "registry key must not be null");
662: }
663: // TODO: we don't do code coverage for the following lines
664: // because in getBreakInstance we always instantiate the shim,
665: // and test execution is such that we always instantiate a
666: // breakiterator before we get to the break iterator tests.
667: // this is for modularization, and we could remove the
668: // dependencies in getBreakInstance by rewriting part of the
669: // LocaleData code, or perhaps by accepting it into the
670: // module.
671: ///CLOVER:OFF
672: if (shim != null) {
673: // Unfortunately, we don't know what is being unregistered
674: // -- what `kind' and what locale -- so we flush all
675: // caches. This is safe but inefficient if people are
676: // actively registering and unregistering.
677: for (int kind = 0; kind < KIND_COUNT; ++kind) {
678: iterCache[kind] = null;
679: }
680: return shim.unregister(key);
681: }
682: return false;
683: ///CLOVER:ON
684: }
685:
686: // end of registration
687:
688: /**
689: * Get a particular kind of BreakIterator for a locale.
690: * Avoids writing a switch statement with getXYZInstance(where) calls.
691: * @internal
692: * @deprecated This API is ICU internal only.
693: */
694: public static BreakIterator getBreakInstance(ULocale where, int kind) {
695:
696: if (iterCache[kind] != null) {
697: BreakIteratorCache cache = (BreakIteratorCache) iterCache[kind]
698: .get();
699: if (cache != null) {
700: if (cache.getLocale().equals(where)) {
701: return cache.createBreakInstance();
702: }
703: }
704: }
705:
706: // sigh, all to avoid linking in ICULocaleData...
707: BreakIterator result = getShim().createBreakIterator(where,
708: kind);
709:
710: BreakIteratorCache cache = new BreakIteratorCache(where, result);
711: iterCache[kind] = new SoftReference(cache);
712: return result;
713: }
714:
715: /**
716: * Returns a list of locales for which BreakIterators can be used.
717: * @return An array of Locales. All of the locales in the array can
718: * be used when creating a BreakIterator.
719: * @stable ICU 2.6
720: */
721: public static synchronized Locale[] getAvailableLocales() {
722: // to avoid linking ICULocaleData
723: return getShim().getAvailableLocales();
724: }
725:
726: /**
727: * Returns a list of locales for which BreakIterators can be used.
728: * @return An array of Locales. All of the locales in the array can
729: * be used when creating a BreakIterator.
730: * @draft ICU 3.2
731: * @provisional This API might change or be removed in a future release.
732: */
733: public static synchronized ULocale[] getAvailableULocales() {
734: // to avoid linking ICULocaleData
735: return getShim().getAvailableULocales();
736: }
737:
738: private static final class BreakIteratorCache {
739:
740: private BreakIterator iter;
741: private ULocale where;
742:
743: BreakIteratorCache(ULocale where, BreakIterator iter) {
744: this .where = where;
745: this .iter = (BreakIterator) iter.clone();
746: }
747:
748: ULocale getLocale() {
749: return where;
750: }
751:
752: BreakIterator createBreakInstance() {
753: return (BreakIterator) iter.clone();
754: }
755: }
756:
757: static abstract class BreakIteratorServiceShim {
758: public abstract Object registerInstance(BreakIterator iter,
759: ULocale l, int k);
760:
761: public abstract boolean unregister(Object key);
762:
763: public abstract Locale[] getAvailableLocales();
764:
765: public abstract ULocale[] getAvailableULocales();
766:
767: public abstract BreakIterator createBreakIterator(ULocale l,
768: int k);
769: }
770:
771: private static BreakIteratorServiceShim shim;
772:
773: private static BreakIteratorServiceShim getShim() {
774: // Note: this instantiation is safe on loose-memory-model configurations
775: // despite lack of synchronization, since the shim instance has no state--
776: // it's all in the class init. The worst problem is we might instantiate
777: // two shim instances, but they'll share the same state so that's ok.
778: if (shim == null) {
779: try {
780: Class cls = Class
781: .forName("com.ibm.icu.text.BreakIteratorFactory");
782: shim = (BreakIteratorServiceShim) cls.newInstance();
783: } catch (MissingResourceException e) {
784: throw e;
785: } catch (Exception e) {
786: ///CLOVER:OFF
787: if (DEBUG) {
788: e.printStackTrace();
789: }
790: throw new RuntimeException(e.getMessage());
791: ///CLOVER:ON
792: }
793: }
794: return shim;
795: }
796:
797: // -------- BEGIN ULocale boilerplate --------
798:
799: /**
800: * Return the locale that was used to create this object, or null.
801: * This may may differ from the locale requested at the time of
802: * this object's creation. For example, if an object is created
803: * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
804: * drawn from <tt>en</tt> (the <i>actual</i> locale), and
805: * <tt>en_US</tt> may be the most specific locale that exists (the
806: * <i>valid</i> locale).
807: *
808: * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
809: * contains a partial preview implementation. The * <i>actual</i>
810: * locale is returned correctly, but the <i>valid</i> locale is
811: * not, in most cases.
812: * @param type type of information requested, either {@link
813: * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
814: * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
815: * @return the information specified by <i>type</i>, or null if
816: * this object was not constructed from locale data.
817: * @see com.ibm.icu.util.ULocale
818: * @see com.ibm.icu.util.ULocale#VALID_LOCALE
819: * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
820: * @draft ICU 2.8 (retain)
821: * @provisional This API might change or be removed in a future release.
822: */
823: public final ULocale getLocale(ULocale.Type type) {
824: return type == ULocale.ACTUAL_LOCALE ? this .actualLocale
825: : this .validLocale;
826: }
827:
828: /**
829: * Set information about the locales that were used to create this
830: * object. If the object was not constructed from locale data,
831: * both arguments should be set to null. Otherwise, neither
832: * should be null. The actual locale must be at the same level or
833: * less specific than the valid locale. This method is intended
834: * for use by factories or other entities that create objects of
835: * this class.
836: * @param valid the most specific locale containing any resource
837: * data, or null
838: * @param actual the locale containing data used to construct this
839: * object, or null
840: * @see com.ibm.icu.util.ULocale
841: * @see com.ibm.icu.util.ULocale#VALID_LOCALE
842: * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
843: * @internal
844: */
845: final void setLocale(ULocale valid, ULocale actual) {
846: // Change the following to an assertion later
847: if ((valid == null) != (actual == null)) {
848: ///CLOVER:OFF
849: throw new IllegalArgumentException();
850: ///CLOVER:ON
851: }
852: // Another check we could do is that the actual locale is at
853: // the same level or less specific than the valid locale.
854: this .validLocale = valid;
855: this .actualLocale = actual;
856: }
857:
858: /**
859: * The most specific locale containing any resource data, or null.
860: * @see com.ibm.icu.util.ULocale
861: * @internal
862: */
863: private ULocale validLocale;
864:
865: /**
866: * The locale containing data used to construct this object, or
867: * null.
868: * @see com.ibm.icu.util.ULocale
869: * @internal
870: */
871: private ULocale actualLocale;
872:
873: // -------- END ULocale boilerplate --------
874: }
|