001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.test.rbbi;
008:
009: import java.io.IOException;
010: import java.io.InputStream;
011: import java.util.ListResourceBundle;
012: import java.util.MissingResourceException;
013:
014: import com.ibm.icu.dev.test.TestFmwk;
015: import com.ibm.icu.text.BreakIterator;
016: import com.ibm.icu.text.DictionaryBasedBreakIterator;
017: import com.ibm.icu.text.RuleBasedBreakIterator;
018:
019: // TODO: {dlf} this test currently doesn't test anything!
020: // You'll notice that the resource that uses the dictionary isn't even on the resource path,
021: // so the dictionary never gets used. Good thing, too, because it would throw a security
022: // exception if run with a security manager. Not that it would matter, the dictionary
023: // resource isn't even in the icu source tree!
024: // In order to fix this:
025: // 1) make sure english.dict matches the current dictionary format required by dbbi
026: // 2) make sure english.dict gets included in icu4jtests.jar
027: // 3) have this test use getResourceAsStream to get a stream on the dictionary, and
028: // directly instantiate a DictionaryBasedBreakIterator. It can use the rules from
029: // the appropriate section of ResourceBundle_en_US_TEST. I'd suggest just copying
030: // the rules into this file.
031: // 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.
032: // 5) process this text to a) create tables of break indices, and b) clean up the test
033: // for the break iterator to work on
034: //
035: // This would NOT test the ability to load dictionary-based break iterators through our
036: // normal resource mechanism. One could install such a break iterator and its
037: // resources into the icu4j jar, and it would work, but there's no way to register entire
038: // resources from outside yet. Even if there were, the access restrictions are a bit
039: // difficult to manage, if one wanted to register a break iterator whose code and data
040: // resides outside the icu4j jar. Since the code to instantiate would be going through
041: // two protection domains, each domain would have to allow access to the data-- but
042: // icu4j's domain wouldn't know about ours. So we could instantiate before registering
043: // the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
044: // at instantiation time, rather than let this be deferred until they are actually needed.
045: //
046: // I've done items 2 and 3 above. Unfortunately, since I haven't done item 1, the
047: // dictionary builder crashes. So for now I'm disabling this test. This is not
048: // that important, since we have a thai dictionary that we do test thoroughly.
049: //
050:
051: public class SimpleBITest extends TestFmwk {
052: public static final String testText =
053: // "The rain in Spain stays mainly on the plain. The plains in Spain are mainly pained with rain.";
054: //"one-two now-- Hah! You owe me exactly $1,345.67... Pay up, huh? By the way, why don't I send you my re\u0301sume\u0301? This is a line\r\nbreak.";
055: //"nowisthetimeforallgoodmen... tocometothehelpoftheircountry";
056: "When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have "
057: //"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave"
058: + "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws"
059: + "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe"
060: + "causeswhichimpelthemtotheseparation\n"
061: + "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain"
062: + "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare"
063: + "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment"
064: + "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying"
065: + "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety"
066: + "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient"
067: + "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than"
068: + "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations,"
069: + "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty,"
070: + "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof"
071: + "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory"
072: + "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe"
073: + "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n"
074: + "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n"
075: + "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill"
076: + "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n"
077: + "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish"
078: + "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n"
079: + "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic"
080: + "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n"
081: + "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n"
082: + "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers,"
083: + "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed"
084: + "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n"
085: + "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof"
086: + "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof"
087: + "lands.\n"
088: + "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n"
089: + "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n"
090: + "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir"
091: + "substance.\n"
092: + "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n"
093: + "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n"
094: + "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;"
095: + "givinghisassenttotheiractsofpretendedlegislation:\n"
096: + "Forquarteringlargebodiesofarmedtroopsamongus:\n"
097: + "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese"
098: + "states:\n"
099: + "Forcuttingoffourtradewithallpartsoftheworld:\n"
100: + "Forimposingtaxesonuswithoutourconsent:\n"
101: + "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n"
102: + "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n"
103: + "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and"
104: + "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese"
105: + "colonies:\n"
106: + "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n"
107: + "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n"
108: + "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n"
109: + "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n"
110: + "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny,"
111: + "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth"
112: + "theheadofacivilizednation.\n"
113: + "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe"
114: + "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n"
115: + "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the"
116: + "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n"
117: + "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave"
118: + "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is"
119: + "unfittobetherulerofafreepeople.\n"
120: + "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir"
121: + "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration"
122: + "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour"
123: + "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We"
124: + "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind,"
125: + "enemiesinwar,inpeacefriends.\n"
126: + "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe"
127: + "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof"
128: + "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent"
129: + "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe"
130: + "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto"
131: + "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent"
132: + "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we"
133: + "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n";
134:
135: public static void main(String[] args) throws Exception {
136: new SimpleBITest().run(args);
137: }
138:
139: protected boolean validate() {
140: // TODO: remove when english.dict gets fixed
141: return false;
142: }
143:
144: private BreakIterator createTestIterator(int kind) {
145: final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";
146:
147: BreakIterator iter = null;
148:
149: ListResourceBundle bundle = null;
150: try {
151: Class cls = Class.forName(bname);
152: bundle = (ListResourceBundle) cls.newInstance();
153: } catch (Exception e) {
154: ///CLOVER:OFF
155: errln("could not create bundle: " + bname + "exception: "
156: + e.getMessage());
157: ///CLOVER:ON
158: return null;
159: }
160:
161: final String[] kindNames = { "Character", "Word", "Line",
162: "Sentence" };
163: String rulesName = kindNames[kind] + "BreakRules";
164: String dictionaryName = kindNames[kind] + "BreakDictionary";
165:
166: String[] classNames = bundle
167: .getStringArray("BreakIteratorClasses");
168: String rules = bundle.getString(rulesName);
169: if (classNames[kind].equals("RuleBasedBreakIterator")) {
170: iter = new RuleBasedBreakIterator(rules);
171: } else if (classNames[kind]
172: .equals("DictionaryBasedBreakIterator")) {
173: try {
174: String dictionaryPath = bundle
175: .getString(dictionaryName);
176: InputStream dictionary = bundle.getClass()
177: .getResourceAsStream(dictionaryPath);
178: System.out.println("looking for " + dictionaryPath
179: + " from " + bundle.getClass() + " returned "
180: + dictionary);
181: iter = new DictionaryBasedBreakIterator(rules,
182: dictionary);
183: } catch (IOException e) {
184: e.printStackTrace();
185: errln(e.getMessage());
186: System.out.println(e); // debug
187: } catch (MissingResourceException e) {
188: errln(e.getMessage());
189: System.out.println(e); // debug
190: }
191: }
192: if (iter == null) {
193: errln("could not create iterator");
194: }
195:
196: return iter;
197: }
198:
199: public void testWordBreak() throws Exception {
200: BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
201: int breaks = doTest(wordBreak);
202: logln(String.valueOf(breaks));
203: }
204:
205: public void testLineBreak() throws Exception {
206: BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
207: int breaks = doTest(lineBreak);
208: logln(String.valueOf(breaks));
209: }
210:
211: public void testSentenceBreak() throws Exception {
212: BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
213: int breaks = doTest(sentenceBreak);
214: logln(String.valueOf(breaks));
215: }
216:
217: private int doTest(BreakIterator bi) {
218: // forward
219: bi.setText(testText);
220: int p = bi.first();
221: int lastP = p;
222: String fragment;
223: int breaks = 0;
224: logln("Forward...");
225: while (p != BreakIterator.DONE) {
226: p = bi.next();
227: if (p != BreakIterator.DONE) {
228: fragment = testText.substring(lastP, p);
229: } else {
230: fragment = testText.substring(lastP);
231: }
232: debugPrintln(": >" + fragment + "<");
233: ++breaks;
234: lastP = p;
235: }
236: return breaks;
237: }
238:
239: private void debugPrintln(String s) {
240: final String zeros = "0000";
241: String temp;
242: StringBuffer out = new StringBuffer();
243: for (int i = 0; i < s.length(); i++) {
244: char c = s.charAt(i);
245: if (c >= ' ' && c < '\u007f')
246: out.append(c);
247: else {
248: out.append("\\u");
249: temp = Integer.toHexString((int) c);
250: out.append(zeros.substring(0, 4 - temp.length()));
251: out.append(temp);
252: }
253: }
254: logln(out.toString());
255: }
256:
257: private void debugPrintln2(String s) {
258: StringBuffer out = new StringBuffer();
259: for (int i = 0; i < s.length(); i++) {
260: char c = s.charAt(i);
261: if (c >= '\u0100')
262: out.append("<" + ((int) c - 0x100) + ">");
263: else
264: out.append(c);
265: }
266: logln(out.toString());
267: }
268: }
|