0001: //##header
0002: /*
0003: *******************************************************************************
0004: * Copyright (C) 1996-2006, International Business Machines Corporation and *
0005: * others. All Rights Reserved. *
0006: *******************************************************************************
0007: */
0008:
0009: package com.ibm.icu.text;
0010:
0011: //import com.ibm.icu.impl.ICULocaleData;
0012: import com.ibm.icu.impl.ICUDebug;
0013: import com.ibm.icu.impl.ICUResourceBundle;
0014: import com.ibm.icu.impl.UCharacterProperty;
0015: import com.ibm.icu.impl.Utility;
0016: import com.ibm.icu.util.ULocale;
0017: import com.ibm.icu.util.UResourceBundle;
0018:
0019: import java.math.BigInteger;
0020: import java.text.FieldPosition;
0021: import java.text.ParsePosition;
0022: import java.util.Arrays;
0023: import java.util.HashMap;
0024: import java.util.Locale;
0025: import java.util.Map;
0026: import java.util.MissingResourceException;
0027: import java.util.Set;
0028:
0029: //import java.util.ResourceBundle;
0030:
0031: /**
0032: * <p>A class that formats numbers according to a set of rules. This number formatter is
0033: * typically used for spelling out numeric values in words (e.g., 25,3476 as
0034: * "twenty-five thousand three hundred seventy-six" or "vingt-cinq mille trois
0035: * cents soixante-seize" or
0036: * "funfundzwanzigtausenddreihundertsechsundsiebzig"), but can also be used for
0037: * other complicated formatting tasks, such as formatting a number of seconds as hours,
0038: * minutes and seconds (e.g., 3,730 as "1:02:10").</p>
0039: *
0040: * <p>The resources contain three predefined formatters for each locale: spellout, which
0041: * spells out a value in words (123 is "one hundred twenty-three"); ordinal, which
0042: * appends an ordinal suffix to the end of a numeral (123 is "123rd"); and
0043: * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
0044: * "2:03"). The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
0045: * by supplying programmer-defined rule sets.</p>
0046: *
0047: * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
0048: * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
0049: * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
0050: * Each rule has a string of output text and a value or range of values it is applicable to.
0051: * In a typical spellout rule set, the first twenty rules are the words for the numbers from
0052: * 0 to 19:</p>
0053: *
0054: * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
0055: * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
0056: *
0057: * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
0058: * we only have to supply the words for the multiples of 10:</p>
0059: *
0060: * <pre>20: twenty[->>];
0061: * 30: thirty{->>];
0062: * 40: forty[->>];
0063: * 50: fifty[->>];
0064: * 60: sixty[->>];
0065: * 70: seventy[->>];
0066: * 80: eighty[->>];
0067: * 90: ninety[->>];</pre>
0068: *
0069: * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
0070: * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
0071: * to all numbers from its own base value to one less than the next rule's base value. The
0072: * ">>" token is called a <em>substitution</em> and tells the fomatter to
0073: * isolate the number's ones digit, format it using this same set of rules, and place the
0074: * result at the position of the ">>" token. Text in brackets is omitted if
0075: * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
0076: * is "twenty-four," not "twenty four").</p>
0077: *
0078: * <p>For even larger numbers, we can actually look up several parts of the number in the
0079: * list:</p>
0080: *
0081: * <pre>100: << hundred[ >>];</pre>
0082: *
0083: * <p>The "<<" represents a new kind of substitution. The << isolates
0084: * the hundreds digit (and any digits to its left), formats it using this same rule set, and
0085: * places the result where the "<<" was. Notice also that the meaning of
0086: * >> has changed: it now refers to both the tens and the ones digits. The meaning of
0087: * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
0088: * which is the highest power of 10 that is less than or equal to the base value (the user
0089: * can change this). To fill in the substitutions, the formatter divides the number being
0090: * formatted by the divisor. The integral quotient is used to fill in the <<
0091: * substitution, and the remainder is used to fill in the >> substitution. The meaning
0092: * of the brackets changes similarly: text in brackets is omitted if the value being
0093: * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
0094: * if a substitution is filled in with text that includes another substitution, that
0095: * substitution is also filled in.</p>
0096: *
0097: * <p>This rule covers values up to 999, at which point we add another rule:</p>
0098: *
0099: * <pre>1000: << thousand[ >>];</pre>
0100: *
0101: * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
0102: * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
0103: * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
0104: *
0105: * <pre>1,000,000: << million[ >>];
0106: * 1,000,000,000: << billion[ >>];
0107: * 1,000,000,000,000: << trillion[ >>];
0108: * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
0109: *
0110: * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
0111: * are ignored by the rule parser. The last rule in the list is customarily treated as an
0112: * "overflow rule," applying to everything from its base value on up, and often (as
0113: * in this example) being used to print out an error message or default representation.
0114: * Notice also that the size of the major groupings in large numbers is controlled by the
0115: * spacing of the rules: because in English we group numbers by thousand, the higher rules
0116: * are separated from each other by a factor of 1,000.</p>
0117: *
0118: * <p>To see how these rules actually work in practice, consider the following example:
0119: * Formatting 25,430 with this rule set would work like this:</p>
0120: *
0121: * <table border="0" width="630">
0122: * <tr>
0123: * <td width="21"></td>
0124: * <td width="257" valign="top"><strong><< thousand >></strong></td>
0125: * <td width="340" valign="top">[the rule whose base value is 1,000 is applicable to 25,340]</td>
0126: * </tr>
0127: * <tr>
0128: * <td width="21"></td>
0129: * <td width="257" valign="top"><strong>twenty->></strong> thousand >></td>
0130: * <td width="340" valign="top">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
0131: * </tr>
0132: * <tr>
0133: * <td width="21"></td>
0134: * <td width="257" valign="top">twenty-<strong>five</strong> thousand >></td>
0135: * <td width="340" valign="top">[25 mod 10 is 5. The rule for 5 is "five."</td>
0136: * </tr>
0137: * <tr>
0138: * <td width="21"></td>
0139: * <td width="257" valign="top">twenty-five thousand <strong><< hundred >></strong></td>
0140: * <td width="340" valign="top">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
0141: * </tr>
0142: * <tr>
0143: * <td width="21"></td>
0144: * <td width="257" valign="top">twenty-five thousand <strong>three</strong> hundred >></td>
0145: * <td width="340" valign="top">[340 over 100 is 3. The rule for 3 is "three."]</td>
0146: * </tr>
0147: * <tr>
0148: * <td width="21"></td>
0149: * <td width="257" valign="top">twenty-five thousand three hundred <strong>forty</strong></td>
0150: * <td width="340" valign="top">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
0151: * evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
0152: * </tr>
0153: * </table>
0154: *
0155: * <p>The above syntax suffices only to format positive integers. To format negative numbers,
0156: * we add a special rule:</p>
0157: *
0158: * <pre>-x: minus >>;</pre>
0159: *
0160: * <p>This is called a <em>negative-number rule,</em> and is identified by "-x"
0161: * where the base value would be. This rule is used to format all negative numbers. the
0162: * >> token here means "find the number's absolute value, format it with these
0163: * rules, and put the result here."</p>
0164: *
0165: * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
0166: * parts:</p>
0167: *
0168: * <pre>x.x: << point >>;</pre>
0169: *
0170: * <p>This rule is used for all positive non-integers (negative non-integers pass through the
0171: * negative-number rule first and then through this rule). Here, the << token refers to
0172: * the number's integral part, and the >> to the number's fractional part. The
0173: * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
0174: * formatted as "one hundred twenty-three point four five six").</p>
0175: *
0176: * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
0177: *
0178: * <p>There is actually much more flexibility built into the rule language than the
0179: * description above shows. A formatter may own multiple rule sets, which can be selected by
0180: * the caller, and which can use each other to fill in their substitutions. Substitutions can
0181: * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
0182: * used to alter a rule's divisor in various ways. And there is provision for much more
0183: * flexible fraction handling. A complete description of the rule syntax follows:</p>
0184: *
0185: * <hr>
0186: *
0187: * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
0188: * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
0189: * set name must begin with a % sign. Rule sets with names that begin with a single % sign
0190: * are <em>public:</em> the caller can specify that they be used to format and parse numbers.
0191: * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
0192: * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
0193: *
0194: * <p>The user can also specify a special "rule set" named <tt>%%lenient-parse</tt>.
0195: * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
0196: * description which is used to define equivalences for lenient parsing. For more information
0197: * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
0198: * see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning
0199: * in collation rules, such as '&', have no particular meaning when appearing outside
0200: * of the <tt>lenient-parse</tt> rule set.</p>
0201: *
0202: * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
0203: * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
0204: * These parameters are controlled by the description syntax, which consists of a <em>rule
0205: * descriptor,</em> a colon, and a <em>rule body.</em></p>
0206: *
0207: * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
0208: * name of a token):</p>
0209: *
0210: * <table border="0" width="100%">
0211: * <tr>
0212: * <td width="5%" valign="top"></td>
0213: * <td width="8%" valign="top"><em>bv</em>:</td>
0214: * <td valign="top"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
0215: * number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
0216: * which are irgnored. The rule's divisor is the highest power of 10 less than or equal to
0217: * the base value.</td>
0218: * </tr>
0219: * <tr>
0220: * <td width="5%" valign="top"></td>
0221: * <td width="8%" valign="top"><em>bv</em>/<em>rad</em>:</td>
0222: * <td valign="top"><em>bv</em> specifies the rule's base value. The rule's divisor is the
0223: * highest power of <em>rad</em> less than or equal to the base value.</td>
0224: * </tr>
0225: * <tr>
0226: * <td width="5%" valign="top"></td>
0227: * <td width="8%" valign="top"><em>bv</em>>:</td>
0228: * <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
0229: * let the radix be 10, and the exponent be the highest exponent of the radix that yields a
0230: * result less than or equal to the base value. Every > character after the base value
0231: * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
0232: * raised to the power of the exponent; otherwise, the divisor is 1.</td>
0233: * </tr>
0234: * <tr>
0235: * <td width="5%" valign="top"></td>
0236: * <td width="8%" valign="top"><em>bv</em>/<em>rad</em>>:</td>
0237: * <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
0238: * let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
0239: * yields a result less than or equal to the base value. Every > character after the radix
0240: * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
0241: * raised to the power of the exponent; otherwise, the divisor is 1.</td>
0242: * </tr>
0243: * <tr>
0244: * <td width="5%" valign="top"></td>
0245: * <td width="8%" valign="top">-x:</td>
0246: * <td valign="top">The rule is a negative-number rule.</td>
0247: * </tr>
0248: * <tr>
0249: * <td width="5%" valign="top"></td>
0250: * <td width="8%" valign="top">x.x:</td>
0251: * <td valign="top">The rule is an <em>improper fraction rule.</em></td>
0252: * </tr>
0253: * <tr>
0254: * <td width="5%" valign="top"></td>
0255: * <td width="8%" valign="top">0.x:</td>
0256: * <td valign="top">The rule is a <em>proper fraction rule.</em></td>
0257: * </tr>
0258: * <tr>
0259: * <td width="5%" valign="top"></td>
0260: * <td width="8%" valign="top">x.0:</td>
0261: * <td valign="top">The rule is a <em>master rule.</em></td>
0262: * </tr>
0263: * <tr>
0264: * <td width="5%" valign="top"></td>
0265: * <td width="8%" valign="top"><em>nothing</em></td>
0266: * <td valign="top">If the rule's rule descriptor is left out, the base value is one plus the
0267: * preceding rule's base value (or zero if this is the first rule in the list) in a normal
0268: * rule set. In a fraction rule set, the base value is the same as the preceding rule's
0269: * base value.</td>
0270: * </tr>
0271: * </table>
0272: *
0273: * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
0274: * on whether it is used to format a number's integral part (or the whole number) or a
0275: * number's fractional part. Using a rule set to format a rule's fractional part makes it a
0276: * fraction rule set.</p>
0277: *
0278: * <p>Which rule is used to format a number is defined according to one of the following
0279: * algorithms: If the rule set is a regular rule set, do the following:
0280: *
0281: * <ul>
0282: * <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
0283: * use the master rule. (If the number being formatted was passed in as a <tt>long</tt>,
0284: * the master rule is ignored.)</li>
0285: * <li>If the number is negative, use the negative-number rule.</li>
0286: * <li>If the number has a fractional part and is greater than 1, use the improper fraction
0287: * rule.</li>
0288: * <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
0289: * rule.</li>
0290: * <li>Binary-search the rule list for the rule with the highest base value less than or equal
0291: * to the number. If that rule has two substitutions, its base value is not an even multiple
0292: * of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
0293: * rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
0294: * </ul>
0295: *
0296: * <p>If the rule set is a fraction rule set, do the following:
0297: *
0298: * <ul>
0299: * <li>Ignore negative-number and fraction rules.</li>
0300: * <li>For each rule in the list, multiply the number being formatted (which will always be
0301: * between 0 and 1) by the rule's base value. Keep track of the distance between the result
0302: * the nearest integer.</li>
0303: * <li>Use the rule that produced the result closest to zero in the above calculation. In the
0304: * event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
0305: * to try each rule's base value as a possible denominator of a fraction. Whichever
0306: * denominator produces the fraction closest in value to the number being formatted wins.) If
0307: * the rule following the matching rule has the same base value, use it if the numerator of
0308: * the fraction is anything other than 1; if the numerator is 1, use the original matching
0309: * rule. (This is to allow singular and plural forms of the rule text without a lot of extra
0310: * hassle.)</li>
0311: * </ul>
0312: *
0313: * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
0314: * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
0315: * brackets. The brackets denote optional text (and may also include one or both
0316: * substitutions). The exact meanings of the substitution tokens, and under what conditions
0317: * optional text is omitted, depend on the syntax of the substitution token and the context.
0318: * The rest of the text in a rule body is literal text that is output when the rule matches
0319: * the number being formatted.</p>
0320: *
0321: * <p>A substitution token begins and ends with a <em>token character.</em> The token
0322: * character and the context together specify a mathematical operation to be performed on the
0323: * number being formatted. An optional <em>substitution descriptor </em>specifies how the
0324: * value resulting from that operation is used to fill in the substitution. The position of
0325: * the substitution token in the rule body specifies the location of the resultant text in
0326: * the original rule text.</p>
0327: *
0328: * <p>The meanings of the substitution token characters are as follows:</p>
0329: *
0330: * <table border="0" width="100%">
0331: * <tr>
0332: * <td width="37"></td>
0333: * <td width="23">>></td>
0334: * <td width="165" valign="top">in normal rule</td>
0335: * <td>Divide the number by the rule's divisor and format the remainder</td>
0336: * </tr>
0337: * <tr>
0338: * <td width="37"></td>
0339: * <td width="23"></td>
0340: * <td width="165" valign="top">in negative-number rule</td>
0341: * <td>Find the absolute value of the number and format the result</td>
0342: * </tr>
0343: * <tr>
0344: * <td width="37"></td>
0345: * <td width="23"></td>
0346: * <td width="165" valign="top">in fraction or master rule</td>
0347: * <td>Isolate the number's fractional part and format it.</td>
0348: * </tr>
0349: * <tr>
0350: * <td width="37"></td>
0351: * <td width="23"></td>
0352: * <td width="165" valign="top">in rule in fraction rule set</td>
0353: * <td>Not allowed.</td>
0354: * </tr>
0355: * <tr>
0356: * <td width="37"></td>
0357: * <td width="23">>>></td>
0358: * <td width="165" valign="top">in normal rule</td>
0359: * <td>Divide the number by the rule's divisor and format the remainder,
0360: * but bypass the normal rule-selection process and just use the
0361: * rule that precedes this one in this rule list.</td>
0362: * </tr>
0363: * <tr>
0364: * <td width="37"></td>
0365: * <td width="23"></td>
0366: * <td width="165" valign="top">in all other rules</td>
0367: * <td>Not allowed.</td>
0368: * </tr>
0369: * <tr>
0370: * <td width="37"></td>
0371: * <td width="23"><<</td>
0372: * <td width="165" valign="top">in normal rule</td>
0373: * <td>Divide the number by the rule's divisor and format the quotient</td>
0374: * </tr>
0375: * <tr>
0376: * <td width="37"></td>
0377: * <td width="23"></td>
0378: * <td width="165" valign="top">in negative-number rule</td>
0379: * <td>Not allowed.</td>
0380: * </tr>
0381: * <tr>
0382: * <td width="37"></td>
0383: * <td width="23"></td>
0384: * <td width="165" valign="top">in fraction or master rule</td>
0385: * <td>Isolate the number's integral part and format it.</td>
0386: * </tr>
0387: * <tr>
0388: * <td width="37"></td>
0389: * <td width="23"></td>
0390: * <td width="165" valign="top">in rule in fraction rule set</td>
0391: * <td>Multiply the number by the rule's base value and format the result.</td>
0392: * </tr>
0393: * <tr>
0394: * <td width="37"></td>
0395: * <td width="23">==</td>
0396: * <td width="165" valign="top">in all rule sets</td>
0397: * <td>Format the number unchanged</td>
0398: * </tr>
0399: * <tr>
0400: * <td width="37"></td>
0401: * <td width="23">[]</td>
0402: * <td width="165" valign="top">in normal rule</td>
0403: * <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
0404: * </tr>
0405: * <tr>
0406: * <td width="37"></td>
0407: * <td width="23"></td>
0408: * <td width="165" valign="top">in negative-number rule</td>
0409: * <td>Not allowed.</td>
0410: * </tr>
0411: * <tr>
0412: * <td width="37"></td>
0413: * <td width="23"></td>
0414: * <td width="165" valign="top">in improper-fraction rule</td>
0415: * <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
0416: * x.x rule and a 0.x rule)</td>
0417: * </tr>
0418: * <tr>
0419: * <td width="37"></td>
0420: * <td width="23"></td>
0421: * <td width="165" valign="top">in master rule</td>
0422: * <td>Omit the optional text if the number is an integer (same as specifying both an x.x
0423: * rule and an x.0 rule)</td>
0424: * </tr>
0425: * <tr>
0426: * <td width="37"></td>
0427: * <td width="23"></td>
0428: * <td width="165" valign="top">in proper-fraction rule</td>
0429: * <td>Not allowed.</td>
0430: * </tr>
0431: * <tr>
0432: * <td width="37"></td>
0433: * <td width="23"></td>
0434: * <td width="165" valign="top">in rule in fraction rule set</td>
0435: * <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
0436: * </tr>
0437: * </table>
0438: *
0439: * <p>The substitution descriptor (i.e., the text between the token characters) may take one
0440: * of three forms:</p>
0441: *
0442: * <table border="0" width="100%">
0443: * <tr>
0444: * <td width="42"></td>
0445: * <td width="166" valign="top">a rule set name</td>
0446: * <td>Perform the mathematical operation on the number, and format the result using the
0447: * named rule set.</td>
0448: * </tr>
0449: * <tr>
0450: * <td width="42"></td>
0451: * <td width="166" valign="top">a DecimalFormat pattern</td>
0452: * <td>Perform the mathematical operation on the number, and format the result using a
0453: * DecimalFormat with the specified pattern. The pattern must begin with 0 or #.</td>
0454: * </tr>
0455: * <tr>
0456: * <td width="42"></td>
0457: * <td width="166" valign="top">nothing</td>
0458: * <td>Perform the mathematical operation on the number, and format the result using the rule
0459: * set containing the current rule, except:<ul>
0460: * <li>You can't have an empty substitution descriptor with a == substitution.</li>
0461: * <li>If you omit the substitution descriptor in a >> substitution in a fraction rule,
0462: * format the result one digit at a time using the rule set containing the current rule.</li>
0463: * <li>If you omit the substitution descriptor in a << substitution in a rule in a
0464: * fraction rule set, format the result using the default rule set for this formatter.</li>
0465: * </ul>
0466: * </td>
0467: * </tr>
0468: * </table>
0469: *
0470: * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
0471: * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
0472: * the apostrophe is ignored, but all text after it becomes significant (this is how you can
0473: * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
0474: * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
0475: * names. The characters beginning a substitution token are always treated as the beginning
0476: * of a substitution token.</p>
0477: *
0478: * <p>See the resource data and the demo program for annotated examples of real rule sets
0479: * using these features.</p>
0480: *
0481: * @author Richard Gillam
0482: * @see NumberFormat
0483: * @see DecimalFormat
0484: * @stable ICU 2.0
0485: */
0486: public class RuleBasedNumberFormat extends NumberFormat {
0487:
0488: //-----------------------------------------------------------------------
0489: // constants
0490: //-----------------------------------------------------------------------
0491:
0492: // Generated by serialver from JDK 1.4.1_01
0493: static final long serialVersionUID = -7664252765575395068L;
0494:
0495: /**
0496: * Puts a copyright in the .class file
0497: */
0498: private static final String copyrightNotice = "Copyright \u00a91997-2004 IBM Corp. All rights reserved.";
0499:
0500: /**
0501: * Selector code that tells the constructor to create a spellout formatter
0502: * @stable ICU 2.0
0503: */
0504: public static final int SPELLOUT = 1;
0505:
0506: /**
0507: * Selector code that tells the constructor to create an ordinal formatter
0508: * @stable ICU 2.0
0509: */
0510: public static final int ORDINAL = 2;
0511:
0512: /**
0513: * Selector code that tells the constructor to create a duration formatter
0514: * @stable ICU 2.0
0515: */
0516: public static final int DURATION = 3;
0517:
0518: //-----------------------------------------------------------------------
0519: // data members
0520: //-----------------------------------------------------------------------
0521:
0522: /**
0523: * The formatter's rule sets.
0524: */
0525: private transient NFRuleSet[] ruleSets = null;
0526:
0527: /**
0528: * A pointer to the formatter's default rule set. This is always included
0529: * in ruleSets.
0530: */
0531: private transient NFRuleSet defaultRuleSet = null;
0532:
0533: /**
0534: * The formatter's locale. This is used to create DecimalFormatSymbols and
0535: * Collator objects.
0536: * @serial
0537: */
0538: private ULocale locale = null;
0539:
0540: /**
0541: * Collator to be used in lenient parsing. This variable is lazy-evaluated:
0542: * the collator is actually created the first time the client does a parse
0543: * with lenient-parse mode turned on.
0544: */
0545: private transient Collator collator = null;
0546:
0547: /**
0548: * The DecimalFormatSymbols object that any DecimalFormat objects this
0549: * formatter uses should use. This variable is lazy-evaluated: it isn't
0550: * filled in if the rule set never uses a DecimalFormat pattern.
0551: */
0552: private transient DecimalFormatSymbols decimalFormatSymbols = null;
0553:
0554: /**
0555: * Flag specifying whether lenient parse mode is on or off. Off by default.
0556: * @serial
0557: */
0558: private boolean lenientParse = false;
0559:
0560: /**
0561: * If the description specifies lenient-parse rules, they're stored here until
0562: * the collator is created.
0563: */
0564: private transient String lenientParseRules;
0565:
0566: /**
0567: * If the description specifies post-process rules, they're stored here until
0568: * post-processing is required.
0569: */
0570: private transient String postProcessRules;
0571:
0572: /**
0573: * Post processor lazily constructed from the postProcessRules.
0574: */
0575: private transient RBNFPostProcessor postProcessor;
0576:
0577: /**
0578: * Localizations for rule set names.
0579: * @serial
0580: */
0581: private Map ruleSetDisplayNames;
0582:
0583: /**
0584: * The public rule set names;
0585: * @serial
0586: */
0587: private String[] publicRuleSetNames;
0588:
0589: private static final boolean DEBUG = ICUDebug.enabled("rbnf");
0590:
0591: //-----------------------------------------------------------------------
0592: // constructors
0593: //-----------------------------------------------------------------------
0594:
0595: /**
0596: * Creates a RuleBasedNumberFormat that behaves according to the description
0597: * passed in. The formatter uses the default locale.
0598: * @param description A description of the formatter's desired behavior.
0599: * See the class documentation for a complete explanation of the description
0600: * syntax.
0601: * @stable ICU 2.0
0602: */
0603: public RuleBasedNumberFormat(String description) {
0604: locale = ULocale.getDefault();
0605: init(description, null);
0606: }
0607:
0608: /**
0609: * Creates a RuleBasedNumberFormat that behaves according to the description
0610: * passed in. The formatter uses the default locale.
0611: * <p>
0612: * The localizations data provides information about the public
0613: * rule sets and their localized display names for different
0614: * locales. The first element in the list is an array of the names
0615: * of the public rule sets. The first element in this array is
0616: * the initial default ruleset. The remaining elements in the
0617: * list are arrays of localizations of the names of the public
0618: * rule sets. Each of these is one longer than the initial array,
0619: * with the first String being the ULocale ID, and the remaining
0620: * Strings being the localizations of the rule set names, in the
0621: * same order as the initial array.
0622: * @param description A description of the formatter's desired behavior.
0623: * See the class documentation for a complete explanation of the description
0624: * syntax.
0625: * @param localizations a list of localizations for the rule set
0626: * names in the description.
0627: * @draft ICU 3.2
0628: * @provisional This API might change or be removed in a future release.
0629: */
0630: public RuleBasedNumberFormat(String description,
0631: String[][] localizations) {
0632: locale = ULocale.getDefault();
0633: init(description, localizations);
0634: }
0635:
0636: /**
0637: * Creates a RuleBasedNumberFormat that behaves according to the description
0638: * passed in. The formatter uses the specified locale to determine the
0639: * characters to use when formatting in numerals, and to define equivalences
0640: * for lenient parsing.
0641: * @param description A description of the formatter's desired behavior.
0642: * See the class documentation for a complete explanation of the description
0643: * syntax.
0644: * @param locale A locale, which governs which characters are used for
0645: * formatting values in numerals, and which characters are equivalent in
0646: * lenient parsing.
0647: * @stable ICU 2.0
0648: */
0649: public RuleBasedNumberFormat(String description, Locale locale) {
0650: this (description, ULocale.forLocale(locale));
0651: }
0652:
0653: /**
0654: * Creates a RuleBasedNumberFormat that behaves according to the description
0655: * passed in. The formatter uses the specified locale to determine the
0656: * characters to use when formatting in numerals, and to define equivalences
0657: * for lenient parsing.
0658: * @param description A description of the formatter's desired behavior.
0659: * See the class documentation for a complete explanation of the description
0660: * syntax.
0661: * @param locale A locale, which governs which characters are used for
0662: * formatting values in numerals, and which characters are equivalent in
0663: * lenient parsing.
0664: * @draft ICU 3.2
0665: * @provisional This API might change or be removed in a future release.
0666: */
0667: public RuleBasedNumberFormat(String description, ULocale locale) {
0668: this .locale = locale;
0669: init(description, null);
0670: }
0671:
0672: /**
0673: * Creates a RuleBasedNumberFormat that behaves according to the description
0674: * passed in. The formatter uses the specified locale to determine the
0675: * characters to use when formatting in numerals, and to define equivalences
0676: * for lenient parsing.
0677: * <p>
0678: * The localizations data provides information about the public
0679: * rule sets and their localized display names for different
0680: * locales. The first element in the list is an array of the names
0681: * of the public rule sets. The first element in this array is
0682: * the initial default ruleset. The remaining elements in the
0683: * list are arrays of localizations of the names of the public
0684: * rule sets. Each of these is one longer than the initial array,
0685: * with the first String being the ULocale ID, and the remaining
0686: * Strings being the localizations of the rule set names, in the
0687: * same order as the initial array.
0688: * @param description A description of the formatter's desired behavior.
0689: * See the class documentation for a complete explanation of the description
0690: * syntax.
0691: * @param localizations a list of localizations for the rule set names in the description.
0692: * @param locale A ulocale that governs which characters are used for
0693: * formatting values in numerals, and determines which characters are equivalent in
0694: * lenient parsing.
0695: * @draft ICU 3.2
0696: * @provisional This API might change or be removed in a future release.
0697: */
0698: public RuleBasedNumberFormat(String description,
0699: String[][] localizations, ULocale locale) {
0700: this .locale = locale;
0701: init(description, localizations);
0702: }
0703:
0704: /**
0705: * Creates a RuleBasedNumberFormat from a predefined description. The selector
0706: * code choosed among three possible predefined formats: spellout, ordinal,
0707: * and duration.
0708: * @param locale The locale for the formatter.
0709: * @param format A selector code specifying which kind of formatter to create for that
0710: * locale. There are three legal values: SPELLOUT, which creates a formatter that
0711: * spells out a value in words in the desired language, ORDINAL, which attaches
0712: * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
0713: * and DURATION, which formats a duration in seconds as hours, minutes, and seconds.
0714: * @stable ICU 2.0
0715: */
0716: public RuleBasedNumberFormat(Locale locale, int format) {
0717: this (ULocale.forLocale(locale), format);
0718: }
0719:
0720: /**
0721: * Creates a RuleBasedNumberFormat from a predefined description. The selector
0722: * code choosed among three possible predefined formats: spellout, ordinal,
0723: * and duration.
0724: * @param locale The locale for the formatter.
0725: * @param format A selector code specifying which kind of formatter to create for that
0726: * locale. There are three legal values: SPELLOUT, which creates a formatter that
0727: * spells out a value in words in the desired language, ORDINAL, which attaches
0728: * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
0729: * and DURATION, which formats a duration in seconds as hours, minutes, and seconds.
0730: * @draft ICU 3.2
0731: * @provisional This API might change or be removed in a future release.
0732: */
0733: public RuleBasedNumberFormat(ULocale locale, int format) {
0734: this .locale = locale;
0735:
0736: ICUResourceBundle bundle = (ICUResourceBundle) UResourceBundle
0737: .getBundleInstance(
0738: ICUResourceBundle.ICU_RBNF_BASE_NAME, locale);
0739:
0740: // TODO: determine correct actual/valid locale. Note ambiguity
0741: // here -- do actual/valid refer to pattern, DecimalFormatSymbols,
0742: // or Collator?
0743: ULocale uloc = bundle.getULocale();
0744: setLocale(uloc, uloc);
0745:
0746: String description = "";
0747: String[][] localizations = null;
0748:
0749: try {
0750: description = bundle.getString(rulenames[format - 1]);
0751: ICUResourceBundle locb = bundle.get(locnames[format - 1]);
0752: localizations = new String[locb.getSize()][];
0753: for (int i = 0; i < localizations.length; ++i) {
0754: localizations[i] = locb.get(i).getStringArray();
0755: }
0756: } catch (MissingResourceException e) {
0757: // might have description and no localizations, or no description...
0758: }
0759:
0760: init(description, localizations);
0761: }
0762:
0763: private static final String[] rulenames = { "SpelloutRules",
0764: "OrdinalRules", "DurationRules", };
0765: private static final String[] locnames = { "SpelloutLocalizations",
0766: "OrdinalLocalizations", "DurationLocalizations", };
0767:
0768: /**
0769: * Creates a RuleBasedNumberFormat from a predefined description. Uses the
0770: * default locale.
0771: * @param format A selector code specifying which kind of formatter to create.
0772: * There are three legal values: SPELLOUT, which creates a formatter that spells
0773: * out a value in words in the default locale's langyage, ORDINAL, which attaches
0774: * an ordinal suffix from the default locale's language to a numeral, and
0775: * DURATION, which formats a duration in seconds as hours, minutes, and seconds.
0776: * @stable ICU 2.0
0777: */
0778: public RuleBasedNumberFormat(int format) {
0779: this (ULocale.getDefault(), format);
0780: }
0781:
0782: //-----------------------------------------------------------------------
0783: // boilerplate
0784: //-----------------------------------------------------------------------
0785:
0786: /**
0787: * Duplicates this formatter.
0788: * @return A RuleBasedNumberFormat that is equal to this one.
0789: * @stable ICU 2.0
0790: */
0791: public Object clone() {
0792: return super .clone();
0793: }
0794:
0795: /**
0796: * Tests two RuleBasedNumberFormats for equality.
0797: * @param that The formatter to compare against this one.
0798: * @return true if the two formatters have identical behavior.
0799: * @stable ICU 2.0
0800: */
0801: public boolean equals(Object that) {
0802: // if the other object isn't a RuleBasedNumberFormat, that's
0803: // all we need to know
0804: if (!(that instanceof RuleBasedNumberFormat)) {
0805: return false;
0806: } else {
0807: // cast the other object's pointer to a pointer to a
0808: // RuleBasedNumberFormat
0809: RuleBasedNumberFormat that2 = (RuleBasedNumberFormat) that;
0810:
0811: // compare their locales and lenient-parse modes
0812: if (!locale.equals(that2.locale)
0813: || lenientParse != that2.lenientParse) {
0814: return false;
0815: }
0816:
0817: // if that succeeds, then compare their rule set lists
0818: if (ruleSets.length != that2.ruleSets.length) {
0819: return false;
0820: }
0821: for (int i = 0; i < ruleSets.length; i++) {
0822: if (!ruleSets[i].equals(that2.ruleSets[i])) {
0823: return false;
0824: }
0825: }
0826:
0827: return true;
0828: }
0829: }
0830:
0831: /**
0832: * Generates a textual description of this formatter.
0833: * @return a String containing a rule set that will produce a RuleBasedNumberFormat
0834: * with identical behavior to this one. This won't necessarily be identical
0835: * to the rule set description that was originally passed in, but will produce
0836: * the same result.
0837: * @stable ICU 2.0
0838: */
0839: public String toString() {
0840:
0841: // accumulate the descriptions of all the rule sets in a
0842: // StringBuffer, then cast it to a String and return it
0843: StringBuffer result = new StringBuffer();
0844: for (int i = 0; i < ruleSets.length; i++) {
0845: result.append(ruleSets[i].toString());
0846: }
0847: return result.toString();
0848: }
0849:
0850: /**
0851: * Writes this object to a stream.
0852: * @param out The stream to write to.
0853: */
0854: private void writeObject(java.io.ObjectOutputStream out)
0855: throws java.io.IOException {
0856: // we just write the textual description to the stream, so we
0857: // have an implementation-independent streaming format
0858: out.writeUTF(this .toString());
0859: out.writeObject(this .locale);
0860: }
0861:
0862: /**
0863: * Reads this object in from a stream.
0864: * @param in The stream to read from.
0865: */
0866: private void readObject(java.io.ObjectInputStream in)
0867: throws java.io.IOException,
0868: java.lang.ClassNotFoundException {
0869:
0870: // read the description in from the stream
0871: String description = in.readUTF();
0872: ULocale loc;
0873:
0874: try {
0875: loc = (ULocale) in.readObject();
0876: } catch (Exception e) {
0877: loc = ULocale.getDefault();
0878: }
0879:
0880: // build a brand-new RuleBasedNumberFormat from the description,
0881: // then steal its substructure. This object's substructure and
0882: // the temporary RuleBasedNumberFormat drop on the floor and
0883: // get swept up by the garbage collector
0884: RuleBasedNumberFormat temp = new RuleBasedNumberFormat(
0885: description, loc);
0886: ruleSets = temp.ruleSets;
0887: defaultRuleSet = temp.defaultRuleSet;
0888: publicRuleSetNames = temp.publicRuleSetNames;
0889: decimalFormatSymbols = temp.decimalFormatSymbols;
0890: locale = temp.locale;
0891: }
0892:
0893: //-----------------------------------------------------------------------
0894: // public API functions
0895: //-----------------------------------------------------------------------
0896:
0897: /**
0898: * Returns a list of the names of all of this formatter's public rule sets.
0899: * @return A list of the names of all of this formatter's public rule sets.
0900: * @stable ICU 2.0
0901: */
0902: public String[] getRuleSetNames() {
0903: return (String[]) publicRuleSetNames.clone();
0904: }
0905:
0906: /**
0907: * Return a list of locales for which there are locale-specific display names
0908: * for the rule sets in this formatter. If there are no localized display names, return null.
0909: * @return an array of the ulocales for which there is rule set display name information
0910: * @draft ICU 3.2
0911: * @provisional This API might change or be removed in a future release.
0912: */
0913: public ULocale[] getRuleSetDisplayNameLocales() {
0914: if (ruleSetDisplayNames != null) {
0915: Set s = ruleSetDisplayNames.keySet();
0916: String[] locales = (String[]) s
0917: .toArray(new String[s.size()]);
0918: Arrays.sort(locales, String.CASE_INSENSITIVE_ORDER);
0919: ULocale[] result = new ULocale[locales.length];
0920: for (int i = 0; i < locales.length; ++i) {
0921: result[i] = new ULocale(locales[i]);
0922: }
0923: return result;
0924: }
0925: return null;
0926: }
0927:
0928: private String[] getNameListForLocale(ULocale locale) {
0929: if (locale != null && ruleSetDisplayNames != null) {
0930: String[] localeNames = { locale.getBaseName(),
0931: ULocale.getDefault().getBaseName() };
0932: for (int i = 0; i < localeNames.length; ++i) {
0933: String lname = localeNames[i];
0934: while (lname.length() > 0) {
0935: String[] names = (String[]) ruleSetDisplayNames
0936: .get(lname);
0937: if (names != null) {
0938: return names;
0939: }
0940: lname = ULocale.getFallback(lname);
0941: }
0942: }
0943: }
0944: return null;
0945: }
0946:
0947: /**
0948: * Return the rule set display names for the provided locale. These are in the same order
0949: * as those returned by getRuleSetNames. The locale is matched against the locales for
0950: * which there is display name data, using normal fallback rules. If no locale matches,
0951: * the default display names are returned. (These are the internal rule set names minus
0952: * the leading '%'.)
0953: * @return an array of the locales that have display name information
0954: * @see #getRuleSetNames
0955: * @draft ICU 3.2
0956: * @provisional This API might change or be removed in a future release.
0957: */
0958: public String[] getRuleSetDisplayNames(ULocale locale) {
0959: String[] names = getNameListForLocale(locale);
0960: if (names != null) {
0961: return (String[]) names.clone();
0962: }
0963: names = getRuleSetNames();
0964: for (int i = 0; i < names.length; ++i) {
0965: names[i] = names[i].substring(1);
0966: }
0967: return names;
0968: }
0969:
0970: /**
0971: * Return the rule set display names for the current default locale.
0972: * @return an array of the display names
0973: * @draft ICU 3.2
0974: * @see #getRuleSetDisplayNames(ULocale)
0975: * @provisional This API might change or be removed in a future release.
0976: */
0977: public String[] getRuleSetDisplayNames() {
0978: return getRuleSetDisplayNames(ULocale.getDefault());
0979: }
0980:
0981: /**
0982: * Return the rule set display name for the provided rule set and locale.
0983: * The locale is matched against the locales for which there is display name data, using
0984: * normal fallback rules. If no locale matches, the default display name is returned.
0985: * @return the display name for the rule set
0986: * @draft ICU 3.2
0987: * @see #getRuleSetDisplayNames
0988: * @throws IllegalArgumentException if ruleSetName is not a valid rule set name for this format
0989: * @provisional This API might change or be removed in a future release.
0990: */
0991: public String getRuleSetDisplayName(String ruleSetName,
0992: ULocale locale) {
0993: String[] rsnames = publicRuleSetNames;
0994: for (int ix = 0; ix < rsnames.length; ++ix) {
0995: if (rsnames[ix].equals(ruleSetName)) {
0996: String[] names = getNameListForLocale(locale);
0997: if (names != null) {
0998: return names[ix];
0999: }
1000: return rsnames[ix].substring(1);
1001: }
1002: }
1003: throw new IllegalArgumentException(
1004: "unrecognized rule set name: " + ruleSetName);
1005: }
1006:
1007: /**
1008: * Return the rule set display name for the provided rule set in the current default locale.
1009: * @return the display name for the rule set
1010: * @draft ICU 3.2
1011: * @see #getRuleSetDisplayName(String,ULocale)
1012: * @provisional This API might change or be removed in a future release.
1013: */
1014: public String getRuleSetDisplayName(String ruleSetName) {
1015: return getRuleSetDisplayName(ruleSetName, ULocale.getDefault());
1016: }
1017:
1018: /**
1019: * Formats the specified number according to the specified rule set.
1020: * @param number The number to format.
1021: * @param ruleSet The name of the rule set to format the number with.
1022: * This must be the name of a valid public rule set for this formatter.
1023: * @return A textual representation of the number.
1024: * @stable ICU 2.0
1025: */
1026: public String format(double number, String ruleSet)
1027: throws IllegalArgumentException {
1028: if (ruleSet.startsWith("%%")) {
1029: throw new IllegalArgumentException(
1030: "Can't use internal rule set");
1031: }
1032: return format(number, findRuleSet(ruleSet));
1033: }
1034:
1035: /**
1036: * Formats the specified number according to the specified rule set.
1037: * (If the specified rule set specifies a master ["x.0"] rule, this function
1038: * ignores it. Convert the number to a double first if you ned it.) This
1039: * function preserves all the precision in the long-- it doesn't convert it
1040: * to a double.
1041: * @param number The number to format.
1042: * @param ruleSet The name of the rule set to format the number with.
1043: * This must be the name of a valid public rule set for this formatter.
1044: * @return A textual representation of the number.
1045: * @stable ICU 2.0
1046: */
1047: public String format(long number, String ruleSet)
1048: throws IllegalArgumentException {
1049: if (ruleSet.startsWith("%%")) {
1050: throw new IllegalArgumentException(
1051: "Can't use internal rule set");
1052: }
1053: return format(number, findRuleSet(ruleSet));
1054: }
1055:
1056: /**
1057: * Formats the specified number using the formatter's default rule set.
1058: * (The default rule set is the last public rule set defined in the description.)
1059: * @param number The number to format.
1060: * @param toAppendTo A StringBuffer that the result should be appended to.
1061: * @param ignore This function doesn't examine or update the field position.
1062: * @return toAppendTo
1063: * @stable ICU 2.0
1064: */
1065: public StringBuffer format(double number, StringBuffer toAppendTo,
1066: FieldPosition ignore) {
1067: // this is one of the inherited format() methods. Since it doesn't
1068: // have a way to select the rule set to use, it just uses the
1069: // default one
1070: toAppendTo.append(format(number, defaultRuleSet));
1071: return toAppendTo;
1072: }
1073:
1074: /**
1075: * Formats the specified number using the formatter's default rule set.
1076: * (The default rule set is the last public rule set defined in the description.)
1077: * (If the specified rule set specifies a master ["x.0"] rule, this function
1078: * ignores it. Convert the number to a double first if you ned it.) This
1079: * function preserves all the precision in the long-- it doesn't convert it
1080: * to a double.
1081: * @param number The number to format.
1082: * @param toAppendTo A StringBuffer that the result should be appended to.
1083: * @param ignore This function doesn't examine or update the field position.
1084: * @return toAppendTo
1085: * @stable ICU 2.0
1086: */
1087: public StringBuffer format(long number, StringBuffer toAppendTo,
1088: FieldPosition ignore) {
1089: // this is one of the inherited format() methods. Since it doesn't
1090: // have a way to select the rule set to use, it just uses the
1091: // default one
1092: toAppendTo.append(format(number, defaultRuleSet));
1093: return toAppendTo;
1094: }
1095:
1096: /**
1097: * <strong><font face=helvetica color=red>NEW</font></strong>
1098: * Implement com.ibm.icu.text.NumberFormat:
1099: * Format a BigInteger.
1100: * @stable ICU 2.0
1101: */
1102: public StringBuffer format(BigInteger number,
1103: StringBuffer toAppendTo, FieldPosition pos) {
1104: return format(new com.ibm.icu.math.BigDecimal(number),
1105: toAppendTo, pos);
1106: }
1107:
1108: //#ifndef FOUNDATION
1109: /**
1110: * <strong><font face=helvetica color=red>NEW</font></strong>
1111: * Implement com.ibm.icu.text.NumberFormat:
1112: * Format a BigDecimal.
1113: * @stable ICU 2.0
1114: */
1115: public StringBuffer format(java.math.BigDecimal number,
1116: StringBuffer toAppendTo, FieldPosition pos) {
1117: return format(new com.ibm.icu.math.BigDecimal(number),
1118: toAppendTo, pos);
1119: }
1120:
1121: //#endif
1122:
1123: /**
1124: * <strong><font face=helvetica color=red>NEW</font></strong>
1125: * Implement com.ibm.icu.text.NumberFormat:
1126: * Format a BigDecimal.
1127: * @stable ICU 2.0
1128: */
1129: public StringBuffer format(com.ibm.icu.math.BigDecimal number,
1130: StringBuffer toAppendTo, FieldPosition pos) {
1131: // TEMPORARY:
1132: return format(number.doubleValue(), toAppendTo, pos);
1133: }
1134:
1135: /**
1136: * Parses the specfied string, beginning at the specified position, according
1137: * to this formatter's rules. This will match the string against all of the
1138: * formatter's public rule sets and return the value corresponding to the longest
1139: * parseable substring. This function's behavior is affected by the lenient
1140: * parse mode.
1141: * @param text The string to parse
1142: * @param parsePosition On entry, contains the position of the first character
1143: * in "text" to examine. On exit, has been updated to contain the position
1144: * of the first character in "text" that wasn't consumed by the parse.
1145: * @return The number that corresponds to the parsed text. This will be an
1146: * instance of either Long or Double, depending on whether the result has a
1147: * fractional part.
1148: * @see #setLenientParseMode
1149: * @stable ICU 2.0
1150: */
1151: public Number parse(String text, ParsePosition parsePosition) {
1152:
1153: // parsePosition tells us where to start parsing. We copy the
1154: // text in the string from here to the end inro a new string,
1155: // and create a new ParsePosition and result variable to use
1156: // for the duration of the parse operation
1157: String workingText = text.substring(parsePosition.getIndex());
1158: ParsePosition workingPos = new ParsePosition(0);
1159: Number tempResult = null;
1160:
1161: // keep track of the largest number of characters consumed in
1162: // the various trials, and the result that corresponds to it
1163: Number result = new Long(0);
1164: ParsePosition highWaterMark = new ParsePosition(workingPos
1165: .getIndex());
1166:
1167: // iterate over the public rule sets (beginning with the default one)
1168: // and try parsing the text with each of them. Keep track of which
1169: // one consumes the most characters: that's the one that determines
1170: // the result we return
1171: for (int i = ruleSets.length - 1; i >= 0; i--) {
1172: // skip private rule sets
1173: if (ruleSets[i].getName().startsWith("%%")) {
1174: continue;
1175: }
1176:
1177: // try parsing the string with the rule set. If it gets past the
1178: // high-water mark, update the high-water mark and the result
1179: tempResult = ruleSets[i].parse(workingText, workingPos,
1180: Double.MAX_VALUE);
1181: if (workingPos.getIndex() > highWaterMark.getIndex()) {
1182: result = tempResult;
1183: highWaterMark.setIndex(workingPos.getIndex());
1184: }
1185: // commented out because this API on ParsePosition doesn't exist in 1.1.x
1186: // if (workingPos.getErrorIndex() > highWaterMark.getErrorIndex()) {
1187: // highWaterMark.setErrorIndex(workingPos.getErrorIndex());
1188: // }
1189:
1190: // if we manage to use up all the characters in the string,
1191: // we don't have to try any more rule sets
1192: if (highWaterMark.getIndex() == workingText.length()) {
1193: break;
1194: }
1195:
1196: // otherwise, reset our internal parse position to the
1197: // beginning and try again with the next rule set
1198: workingPos.setIndex(0);
1199: }
1200:
1201: // add the high water mark to our original parse position and
1202: // return the result
1203: parsePosition.setIndex(parsePosition.getIndex()
1204: + highWaterMark.getIndex());
1205: // commented out because this API on ParsePosition doesn't exist in 1.1.x
1206: // if (highWaterMark.getIndex() == 0) {
1207: // parsePosition.setErrorIndex(parsePosition.getIndex() + highWaterMark.getErrorIndex());
1208: // }
1209: return result;
1210: }
1211:
1212: /**
1213: * Turns lenient parse mode on and off.
1214: *
1215: * When in lenient parse mode, the formatter uses a Collator for parsing the text.
1216: * Only primary differences are treated as significant. This means that case
1217: * differences, accent differences, alternate spellings of the same letter
1218: * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
1219: * matching the text. In many cases, numerals will be accepted in place of words
1220: * or phrases as well.
1221: *
1222: * For example, all of the following will correctly parse as 255 in English in
1223: * lenient-parse mode:
1224: * <br>"two hundred fifty-five"
1225: * <br>"two hundred fifty five"
1226: * <br>"TWO HUNDRED FIFTY-FIVE"
1227: * <br>"twohundredfiftyfive"
1228: * <br>"2 hundred fifty-5"
1229: *
1230: * The Collator used is determined by the locale that was
1231: * passed to this object on construction. The description passed to this object
1232: * on construction may supply additional collation rules that are appended to the
1233: * end of the default collator for the locale, enabling additional equivalences
1234: * (such as adding more ignorable characters or permitting spelled-out version of
1235: * symbols; see the demo program for examples).
1236: *
1237: * It's important to emphasize that even strict parsing is relatively lenient: it
1238: * will accept some text that it won't produce as output. In English, for example,
1239: * it will correctly parse "two hundred zero" and "fifteen hundred".
1240: *
1241: * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
1242: * @see RuleBasedCollator
1243: * @stable ICU 2.0
1244: */
1245: public void setLenientParseMode(boolean enabled) {
1246: lenientParse = enabled;
1247:
1248: // if we're leaving lenient-parse mode, throw away the collator
1249: // we've been using
1250: if (!enabled) {
1251: collator = null;
1252: }
1253: }
1254:
1255: /**
1256: * Returns true if lenient-parse mode is turned on. Lenient parsing is off
1257: * by default.
1258: * @return true if lenient-parse mode is turned on.
1259: * @see #setLenientParseMode
1260: * @stable ICU 2.0
1261: */
1262: public boolean lenientParseEnabled() {
1263: return lenientParse;
1264: }
1265:
1266: /**
1267: * Override the default rule set to use. If ruleSetName is null, reset
1268: * to the initial default rule set.
1269: * @param ruleSetName the name of the rule set, or null to reset the initial default.
1270: * @throws IllegalArgumentException if ruleSetName is not the name of a public ruleset.
1271: * @stable ICU 2.0
1272: */
1273: public void setDefaultRuleSet(String ruleSetName) {
1274: if (ruleSetName == null) {
1275: if (publicRuleSetNames.length > 0) {
1276: defaultRuleSet = findRuleSet(publicRuleSetNames[0]);
1277: } else {
1278: defaultRuleSet = null;
1279: int n = ruleSets.length;
1280: while (--n >= 0) {
1281: if (ruleSets[n].isPublic()) {
1282: defaultRuleSet = ruleSets[n];
1283: break;
1284: }
1285: }
1286: }
1287: } else if (ruleSetName.startsWith("%%")) {
1288: throw new IllegalArgumentException(
1289: "cannot use private rule set: " + ruleSetName);
1290: } else {
1291: defaultRuleSet = findRuleSet(ruleSetName);
1292: }
1293: }
1294:
1295: /**
1296: * Return the name of the current default rule set.
1297: * @return the name of the current default rule set, if it is public, else the empty string.
1298: * @stable ICU 3.0
1299: */
1300: public String getDefaultRuleSetName() {
1301: if (defaultRuleSet != null && defaultRuleSet.isPublic()) {
1302: return defaultRuleSet.getName();
1303: }
1304: return "";
1305: }
1306:
1307: //-----------------------------------------------------------------------
1308: // package-internal API
1309: //-----------------------------------------------------------------------
1310:
1311: /**
1312: * Returns a reference to the formatter's default rule set. The default
1313: * rule set is the last public rule set in the description, or the one
1314: * most recently set by setDefaultRuleSet.
1315: * @return The formatter's default rule set.
1316: */
1317: NFRuleSet getDefaultRuleSet() {
1318: return defaultRuleSet;
1319: }
1320:
1321: /**
1322: * Returns the collator to use for lenient parsing. The collator is lazily created:
1323: * this function creates it the first time it's called.
1324: * @return The collator to use for lenient parsing, or null if lenient parsing
1325: * is turned off.
1326: */
1327: Collator getCollator() {
1328: // lazy-evaulate the collator
1329: if (collator == null && lenientParse) {
1330: try {
1331: // create a default collator based on the formatter's locale,
1332: // then pull out that collator's rules, append any additional
1333: // rules specified in the description, and create a _new_
1334: // collator based on the combinaiton of those rules
1335: RuleBasedCollator temp = (RuleBasedCollator) Collator
1336: .getInstance(locale);
1337: String rules = temp.getRules() + lenientParseRules;
1338:
1339: collator = new RuleBasedCollator(rules);
1340: collator
1341: .setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1342: } catch (Exception e) {
1343: // If we get here, it means we have a malformed set of
1344: // collation rules, which hopefully won't happen
1345: if (DEBUG) {
1346: e.printStackTrace();
1347: }
1348: collator = null;
1349: }
1350: }
1351:
1352: // if lenient-parse mode is off, this will be null
1353: // (see setLenientParseMode())
1354: return collator;
1355: }
1356:
1357: /**
1358: * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1359: * instances owned by this formatter. This object is lazily created: this function
1360: * creates it the first time it's called.
1361: * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1362: * instances owned by this formatter.
1363: */
1364: DecimalFormatSymbols getDecimalFormatSymbols() {
1365: // lazy-evaluate the DecimalFormatSymbols object. This object
1366: // is shared by all DecimalFormat instances belonging to this
1367: // formatter
1368: if (decimalFormatSymbols == null) {
1369: decimalFormatSymbols = new DecimalFormatSymbols(locale);
1370: }
1371: return decimalFormatSymbols;
1372: }
1373:
1374: //-----------------------------------------------------------------------
1375: // construction implementation
1376: //-----------------------------------------------------------------------
1377:
1378: /**
1379: * This extracts the special information from the rule sets before the
1380: * main parsing starts. Extra whitespace must have already been removed
1381: * from the description. If found, the special information is removed from the
1382: * description and returned, otherwise the description is unchanged and null
1383: * is returned. Note: the trailing semicolon at the end of the special
1384: * rules is stripped.
1385: * @param description the rbnf description with extra whitespace removed
1386: * @param specialName the name of the special rule text to extract
1387: * @return the special rule text, or null if the rule was not found
1388: */
1389: private String extractSpecial(StringBuffer description,
1390: String specialName) {
1391: String result = null;
1392: int lp = Utility.indexOf(description, specialName);
1393: if (lp != -1) {
1394: // we've got to make sure we're not in the middle of a rule
1395: // (where specialName would actually get treated as
1396: // rule text)
1397: if (lp == 0 || description.charAt(lp - 1) == ';') {
1398: // locate the beginning and end of the actual special
1399: // rules (there may be whitespace between the name and
1400: // the first token in the description)
1401: int lpEnd = Utility.indexOf(description, ";%", lp);
1402:
1403: if (lpEnd == -1) {
1404: lpEnd = description.length() - 1; // later we add 1 back to get the '%'
1405: }
1406: int lpStart = lp + specialName.length();
1407: while (lpStart < lpEnd
1408: && UCharacterProperty
1409: .isRuleWhiteSpace(description
1410: .charAt(lpStart))) {
1411: ++lpStart;
1412: }
1413:
1414: // copy out the special rules
1415: result = description.substring(lpStart, lpEnd);
1416:
1417: // remove the special rule from the description
1418: description.delete(lp, lpEnd + 1); // delete the semicolon but not the '%'
1419: }
1420: }
1421: return result;
1422: }
1423:
1424: /**
1425: * This function parses the description and uses it to build all of
1426: * internal data structures that the formatter uses to do formatting
1427: * @param description The description of the formatter's desired behavior.
1428: * This is either passed in by the caller or loaded out of a resource
1429: * by one of the constructors, and is in the description format specified
1430: * in the class docs.
1431: */
1432: private void init(String description, String[][] localizations) {
1433: initLocalizations(localizations);
1434:
1435: // start by stripping the trailing whitespace from all the rules
1436: // (this is all the whitespace follwing each semicolon in the
1437: // description). This allows us to look for rule-set boundaries
1438: // by searching for ";%" without having to worry about whitespace
1439: // between the ; and the %
1440: StringBuffer descBuf = stripWhitespace(description);
1441:
1442: // check to see if there's a set of lenient-parse rules. If there
1443: // is, pull them out into our temporary holding place for them,
1444: // and delete them from the description before the real desciption-
1445: // parsing code sees them
1446:
1447: lenientParseRules = extractSpecial(descBuf, "%%lenient-parse:");
1448: postProcessRules = extractSpecial(descBuf, "%%post-process:");
1449:
1450: // pre-flight parsing the description and count the number of
1451: // rule sets (";%" marks the end of one rule set and the beginning
1452: // of the next)
1453: int numRuleSets = 0;
1454: for (int p = Utility.indexOf(descBuf, ";%"); p != -1; p = Utility
1455: .indexOf(descBuf, ";%", p)) {
1456: ++numRuleSets;
1457: ++p;
1458: }
1459: ++numRuleSets;
1460:
1461: // our rule list is an array of the apprpriate size
1462: ruleSets = new NFRuleSet[numRuleSets];
1463:
1464: // divide up the descriptions into individual rule-set descriptions
1465: // and store them in a temporary array. At each step, we also
1466: // new up a rule set, but all this does is initialize its name
1467: // and remove it from its description. We can't actually parse
1468: // the rest of the descriptions and finish initializing everything
1469: // because we have to know the names and locations of all the rule
1470: // sets before we can actually set everything up
1471: String[] ruleSetDescriptions = new String[numRuleSets];
1472:
1473: int curRuleSet = 0;
1474: int start = 0;
1475: for (int p = Utility.indexOf(descBuf, ";%"); p != -1; p = Utility
1476: .indexOf(descBuf, ";%", start)) {
1477: ruleSetDescriptions[curRuleSet] = descBuf.substring(start,
1478: p + 1);
1479: ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions,
1480: curRuleSet);
1481: ++curRuleSet;
1482: start = p + 1;
1483: }
1484: ruleSetDescriptions[curRuleSet] = descBuf.substring(start);
1485: ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions,
1486: curRuleSet);
1487:
1488: // now we can take note of the formatter's default rule set, which
1489: // is the last public rule set in the description (it's the last
1490: // rather than the first so that a user can create a new formatter
1491: // from an existing formatter and change its default bevhaior just
1492: // by appending more rule sets to the end)
1493:
1494: // {dlf} Initialization of a fraction rule set requires the default rule
1495: // set to be known. For purposes of initialization, this is always the
1496: // last public rule set, no matter what the localization data says.
1497: defaultRuleSet = ruleSets[ruleSets.length - 1];
1498: for (int i = ruleSets.length - 1; i >= 0; --i) {
1499: if (!ruleSets[i].getName().startsWith("%%")) {
1500: defaultRuleSet = ruleSets[i];
1501: break;
1502: }
1503: }
1504:
1505: // finally, we can go back through the temporary descriptions
1506: // list and finish seting up the substructure (and we throw
1507: // away the temporary descriptions as we go)
1508: for (int i = 0; i < ruleSets.length; i++) {
1509: ruleSets[i].parseRules(ruleSetDescriptions[i], this );
1510: ruleSetDescriptions[i] = null;
1511: }
1512:
1513: // Now that the rules are initialized, the 'real' default rule
1514: // set can be adjusted by the localization data.
1515:
1516: // count the number of public rule sets
1517: // (public rule sets have names that begin with % instead of %%)
1518: int publicRuleSetCount = 0;
1519: for (int i = 0; i < ruleSets.length; i++) {
1520: if (!ruleSets[i].getName().startsWith("%%")) {
1521: ++publicRuleSetCount;
1522: }
1523: }
1524:
1525: // prepare an array of the proper size and copy the names into it
1526: String[] publicRuleSetTemp = new String[publicRuleSetCount];
1527: publicRuleSetCount = 0;
1528: for (int i = ruleSets.length - 1; i >= 0; i--) {
1529: if (!ruleSets[i].getName().startsWith("%%")) {
1530: publicRuleSetTemp[publicRuleSetCount++] = ruleSets[i]
1531: .getName();
1532: }
1533: }
1534:
1535: if (publicRuleSetNames != null) {
1536: // confirm the names, if any aren't in the rules, that's an error
1537: // it is ok if the rules contain public rule sets that are not in this list
1538: loop: for (int i = 0; i < publicRuleSetNames.length; ++i) {
1539: String name = publicRuleSetNames[i];
1540: for (int j = 0; j < publicRuleSetTemp.length; ++j) {
1541: if (name.equals(publicRuleSetTemp[j])) {
1542: continue loop;
1543: }
1544: }
1545: throw new IllegalArgumentException(
1546: "did not find public rule set: " + name);
1547: }
1548:
1549: defaultRuleSet = findRuleSet(publicRuleSetNames[0]); // might be different
1550: } else {
1551: publicRuleSetNames = publicRuleSetTemp;
1552: }
1553: }
1554:
1555: /**
1556: * Take the localizations array and create a Map from the locale strings to
1557: * the localization arrays.
1558: */
1559: private void initLocalizations(String[][] localizations) {
1560: if (localizations != null) {
1561: publicRuleSetNames = (String[]) localizations[0].clone();
1562:
1563: Map m = new HashMap();
1564: for (int i = 1; i < localizations.length; ++i) {
1565: String[] data = localizations[i];
1566: String locale = data[0];
1567: String[] names = new String[data.length - 1];
1568: if (names.length != publicRuleSetNames.length) {
1569: throw new IllegalArgumentException(
1570: "public name length: "
1571: + publicRuleSetNames.length
1572: + " != localized names[" + i
1573: + "] length: " + names.length);
1574: }
1575: System.arraycopy(data, 1, names, 0, names.length);
1576: m.put(locale, names);
1577: }
1578:
1579: if (!m.isEmpty()) {
1580: ruleSetDisplayNames = m;
1581: }
1582: }
1583: }
1584:
1585: /**
1586: * This function is used by init() to strip whitespace between rules (i.e.,
1587: * after semicolons).
1588: * @param description The formatter description
1589: * @return The description with all the whitespace that follows semicolons
1590: * taken out.
1591: */
1592: private StringBuffer stripWhitespace(String description) {
1593: // since we don't have a method that deletes characters (why?!!)
1594: // create a new StringBuffer to copy the text into
1595: StringBuffer result = new StringBuffer();
1596:
1597: // iterate through the characters...
1598: int start = 0;
1599: while (start != -1 && start < description.length()) {
1600: // seek to the first non-whitespace character...
1601: while (start < description.length()
1602: && UCharacterProperty.isRuleWhiteSpace(description
1603: .charAt(start))) {
1604: ++start;
1605: }
1606:
1607: //if the first non-whitespace character is semicolon, skip it and continue
1608: if (start < description.length()
1609: && description.charAt(start) == ';') {
1610: start += 1;
1611: continue;
1612: }
1613:
1614: // locate the next semicolon in the text and copy the text from
1615: // our current position up to that semicolon into the result
1616: int p;
1617: p = description.indexOf(';', start);
1618: if (p == -1) {
1619: // or if we don't find a semicolon, just copy the rest of
1620: // the string into the result
1621: result.append(description.substring(start));
1622: start = -1;
1623: } else if (p < description.length()) {
1624: result.append(description.substring(start, p + 1));
1625: start = p + 1;
1626: }
1627:
1628: // when we get here, we've seeked off the end of the sring, and
1629: // we terminate the loop (we continue until *start* is -1 rather
1630: // than until *p* is -1, because otherwise we'd miss the last
1631: // rule in the description)
1632: else {
1633: start = -1;
1634: }
1635: }
1636: return result;
1637: }
1638:
1639: /**
1640: * This function is called ONLY DURING CONSTRUCTION to fill in the
1641: * defaultRuleSet variable once we've set up all the rule sets.
1642: * The default rule set is the last public rule set in the description.
1643: * (It's the last rather than the first so that a caller can append
1644: * text to the end of an existing formatter description to change its
1645: * behavior.)
1646: */
1647: private void initDefaultRuleSet() {
1648: // seek backward from the end of the list until we reach a rule set
1649: // whose name DOESN'T begin with %%. That's the default rule set
1650: for (int i = ruleSets.length - 1; i >= 0; --i) {
1651: if (!ruleSets[i].getName().startsWith("%%")) {
1652: defaultRuleSet = ruleSets[i];
1653: return;
1654: }
1655: }
1656: defaultRuleSet = ruleSets[ruleSets.length - 1];
1657: }
1658:
1659: //-----------------------------------------------------------------------
1660: // formatting implementation
1661: //-----------------------------------------------------------------------
1662:
1663: /**
1664: * Bottleneck through which all the public format() methods
1665: * that take a double pass. By the time we get here, we know
1666: * which rule set we're using to do the formatting.
1667: * @param number The number to format
1668: * @param ruleSet The rule set to use to format the number
1669: * @return The text that resulted from formatting the number
1670: */
1671: private String format(double number, NFRuleSet ruleSet) {
1672: // all API format() routines that take a double vector through
1673: // here. Create an empty string buffer where the result will
1674: // be built, and pass it to the rule set (along with an insertion
1675: // position of 0 and the number being formatted) to the rule set
1676: // for formatting
1677: StringBuffer result = new StringBuffer();
1678: ruleSet.format(number, result, 0);
1679: postProcess(result, ruleSet);
1680: return result.toString();
1681: }
1682:
1683: /**
1684: * Bottleneck through which all the public format() methods
1685: * that take a long pass. By the time we get here, we know
1686: * which rule set we're using to do the formatting.
1687: * @param number The number to format
1688: * @param ruleSet The rule set to use to format the number
1689: * @return The text that resulted from formatting the number
1690: */
1691: private String format(long number, NFRuleSet ruleSet) {
1692: // all API format() routines that take a double vector through
1693: // here. We have these two identical functions-- one taking a
1694: // double and one taking a long-- the couple digits of precision
1695: // that long has but double doesn't (both types are 8 bytes long,
1696: // but double has to borrow some of the mantissa bits to hold
1697: // the exponent).
1698: // Create an empty string buffer where the result will
1699: // be built, and pass it to the rule set (along with an insertion
1700: // position of 0 and the number being formatted) to the rule set
1701: // for formatting
1702: StringBuffer result = new StringBuffer();
1703: ruleSet.format(number, result, 0);
1704: postProcess(result, ruleSet);
1705: return result.toString();
1706: }
1707:
1708: /**
1709: * Post-process the rules if we have a post-processor.
1710: */
1711: private void postProcess(StringBuffer result, NFRuleSet ruleSet) {
1712: if (postProcessRules != null) {
1713: if (postProcessor == null) {
1714: int ix = postProcessRules.indexOf(";");
1715: if (ix == -1) {
1716: ix = postProcessRules.length();
1717: }
1718: String ppClassName = postProcessRules.substring(0, ix)
1719: .trim();
1720: try {
1721: Class cls = Class.forName(ppClassName);
1722: postProcessor = (RBNFPostProcessor) cls
1723: .newInstance();
1724: postProcessor.init(this , postProcessRules);
1725: } catch (Exception e) {
1726: // if debug, print it out
1727: System.out.println("could not locate "
1728: + ppClassName + ", error "
1729: + e.getClass().getName() + ", "
1730: + e.getMessage());
1731: postProcessor = null;
1732: postProcessRules = null; // don't try again
1733: return;
1734: }
1735: }
1736:
1737: postProcessor.process(result, ruleSet);
1738: }
1739: }
1740:
1741: /**
1742: * Returns the named rule set. Throws an IllegalArgumentException
1743: * if this formatter doesn't have a rule set with that name.
1744: * @param name The name of the desired rule set
1745: * @return The rule set with that name
1746: */
1747: NFRuleSet findRuleSet(String name) throws IllegalArgumentException {
1748: for (int i = 0; i < ruleSets.length; i++) {
1749: if (ruleSets[i].getName().equals(name)) {
1750: return ruleSets[i];
1751: }
1752: }
1753: throw new IllegalArgumentException("No rule set named " + name);
1754: }
1755: }
|