001: /*
002: *******************************************************************************
003: * Copyright (C) 2002-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.text;
008:
009: import java.io.IOException;
010: import java.io.InputStream;
011: import java.io.ByteArrayInputStream;
012:
013: import java.util.Locale;
014: import java.util.MissingResourceException;
015: import java.util.ResourceBundle;
016:
017: import com.ibm.icu.impl.ICUData;
018: import com.ibm.icu.impl.ICULocaleData;
019: import com.ibm.icu.impl.ICULocaleService;
020: import com.ibm.icu.impl.ICUResourceBundle;
021: import com.ibm.icu.impl.ICUService;
022: import com.ibm.icu.impl.ICUService.Factory;
023: import com.ibm.icu.util.ULocale;
024: import com.ibm.icu.util.UResourceBundle;
025: import com.ibm.icu.impl.Assert;
026:
027: /**
028: * @author Ram
029: *
030: * To change this generated comment edit the template variable "typecomment":
031: * Window>Preferences>Java>Templates.
032: * To enable and disable the creation of type comments go to
033: * Window>Preferences>Java>Code Generation.
034: */
035: final class BreakIteratorFactory extends
036: BreakIterator.BreakIteratorServiceShim {
037:
038: public Object registerInstance(BreakIterator iter, ULocale locale,
039: int kind) {
040: iter.setText(new java.text.StringCharacterIterator(""));
041: return service.registerObject(iter, locale, kind);
042: }
043:
044: public boolean unregister(Object key) {
045: if (service.isDefault()) {
046: return false;
047: }
048: return service.unregisterFactory((Factory) key);
049: }
050:
051: public Locale[] getAvailableLocales() {
052: if (service == null) {
053: return ICUResourceBundle
054: .getAvailableLocales(ICUResourceBundle.ICU_BASE_NAME);
055: } else {
056: return service.getAvailableLocales();
057: }
058: }
059:
060: public ULocale[] getAvailableULocales() {
061: if (service == null) {
062: return ICUResourceBundle
063: .getAvailableULocales(ICUResourceBundle.ICU_BASE_NAME);
064: } else {
065: return service.getAvailableULocales();
066: }
067: }
068:
069: public BreakIterator createBreakIterator(ULocale locale, int kind) {
070: // TODO: convert to ULocale when service switches over
071: if (service.isDefault()) {
072: return createBreakInstance(locale, kind);
073: }
074: ULocale[] actualLoc = new ULocale[1];
075: BreakIterator iter = (BreakIterator) service.get(locale, kind,
076: actualLoc);
077: iter.setLocale(actualLoc[0], actualLoc[0]); // services make no distinction between actual & valid
078: return iter;
079: }
080:
081: private static class BFService extends ICULocaleService {
082: BFService() {
083: super ("BreakIterator");
084:
085: class RBBreakIteratorFactory extends
086: ICUResourceBundleFactory {
087: protected Object handleCreate(ULocale loc, int kind,
088: ICUService service) {
089: return createBreakInstance(loc, kind);
090: }
091: }
092: registerFactory(new RBBreakIteratorFactory());
093:
094: markDefault();
095: }
096: }
097:
098: static final ICULocaleService service = new BFService();
099:
100: /** KIND_NAMES are the resource key to be used to fetch the name of the
101: * pre-compiled break rules. The resource bundle name is "boundaries".
102: * The value for each key will be the rules to be used for the
103: * specified locale - "word" -> "word_th" for Thai, for example.
104: * DICTIONARY_POSSIBLE indexes in the same way, and indicates whether a
105: * dictionary is a possibility for that type of break. This is just
106: * an optimization to avoid a resource lookup where no dictionary is
107: * ever possible.
108: * @internal
109: */
110: private static final String[] KIND_NAMES = { "grapheme", "word",
111: "line", "sentence", "title" };
112: private static final boolean[] DICTIONARY_POSSIBLE = { false, true,
113: true, false, false };
114:
115: private static BreakIterator createBreakInstance(ULocale locale,
116: int kind) {
117:
118: BreakIterator iter = null;
119: UResourceBundle rb = UResourceBundle.getBundleInstance(
120: ICUResourceBundle.ICU_BRKITR_BASE_NAME, locale);
121:
122: //
123: // Get the binary rules. These are needed for both normal RulesBasedBreakIterators
124: // and for Dictionary iterators.
125: //
126: InputStream ruleStream = null;
127: try {
128: ResourceBundle boundaries = (ResourceBundle) rb
129: .getObject("boundaries");
130: String typeKey = KIND_NAMES[kind];
131: String brkfname = boundaries.getString(typeKey);
132: String rulesFileName = ICUResourceBundle.ICU_BUNDLE
133: + ICUResourceBundle.ICU_BRKITR_NAME + "/"
134: + brkfname;
135: ruleStream = ICUData.getStream(rulesFileName);
136: } catch (Exception e) {
137: throw new MissingResourceException(e.toString(), "", "");
138: }
139:
140: //
141: // Check whether a dictionary exists, and create a DBBI iterator is
142: // one does.
143: //
144: if (DICTIONARY_POSSIBLE[kind]) {
145: // This type of break iterator could potentially use a dictionary.
146: //
147: try {
148: //ICUResourceBundle dictRes = (ICUResourceBundle)rb.getObject("BreakDictionaryData");
149: //byte[] dictBytes = null;
150: //dictBytes = dictRes.getBinary(dictBytes);
151: //TODO: Hard code this for now! fix it once CompactTrieDictionary is ported
152: if (locale.equals("th")) {
153: String fileName = "data/th.brk";
154: InputStream is = ICUData.getStream(fileName);
155: iter = new DictionaryBasedBreakIterator(ruleStream,
156: is);
157: }
158: } catch (MissingResourceException e) {
159: // Couldn't find a dictionary.
160: // This is normal, and will occur whenever creating a word or line
161: // break iterator for a locale that does not have a BreakDictionaryData
162: // resource - meaning for all but Thai.
163: // Fall through to creating a normal RulebasedBreakIterator.
164: } catch (IOException e) {
165: Assert.fail(e);
166: }
167: }
168:
169: if (iter == null) {
170: //
171: // Create a normal RuleBasedBreakIterator.
172: // We have determined that this is not supposed to be a dictionary iterator.
173: //
174: try {
175: iter = RuleBasedBreakIterator
176: .getInstanceFromCompiledRules(ruleStream);
177: } catch (IOException e) {
178: // Shouldn't be possible to get here.
179: // If it happens, the compiled rules are probably corrupted in some way.
180: Assert.fail(e);
181: }
182: }
183: // TODO: Determine valid and actual locale correctly.
184: ULocale uloc = ULocale.forLocale(rb.getLocale());
185: iter.setLocale(uloc, uloc);
186:
187: return iter;
188:
189: }
190:
191: }
|