001: /*
002: **********************************************************************
003: * Copyright (c) 2002, International Business Machines Corporation
004: * and others. All Rights Reserved.
005: **********************************************************************
006: * Date Name Description
007: * 01/14/2002 aliu Creation.
008: **********************************************************************
009: */
010:
011: package com.ibm.icu.text;
012:
013: import com.ibm.icu.impl.Utility;
014:
015: /**
016: * A replacer that produces static text as its output. The text may
017: * contain transliterator stand-in characters that represent nested
018: * UnicodeReplacer objects, making it possible to encode a tree of
019: * replacers in a StringReplacer. A StringReplacer that contains such
020: * stand-ins is called a <em>complex</em> StringReplacer. A complex
021: * StringReplacer has a slower processing loop than a non-complex one.
022: * @author Alan Liu
023: */
024: class StringReplacer implements UnicodeReplacer {
025:
026: /**
027: * Output text, possibly containing stand-in characters that
028: * represent nested UnicodeReplacers.
029: */
030: private String output;
031:
032: /**
033: * Cursor position. Value is ignored if hasCursor is false.
034: */
035: private int cursorPos;
036:
037: /**
038: * True if this object outputs a cursor position.
039: */
040: private boolean hasCursor;
041:
042: /**
043: * A complex object contains nested replacers and requires more
044: * complex processing. StringReplacers are initially assumed to
045: * be complex. If no nested replacers are seen during processing,
046: * then isComplex is set to false, and future replacements are
047: * short circuited for better performance.
048: */
049: private boolean isComplex;
050:
051: /**
052: * Object that translates stand-in characters in 'output' to
053: * UnicodeReplacer objects.
054: */
055: private final RuleBasedTransliterator.Data data;
056:
057: /**
058: * Construct a StringReplacer that sets the emits the given output
059: * text and sets the cursor to the given position.
060: * @param theOutput text that will replace input text when the
061: * replace() method is called. May contain stand-in characters
062: * that represent nested replacers.
063: * @param theCursorPos cursor position that will be returned by
064: * the replace() method
065: * @param theData transliterator context object that translates
066: * stand-in characters to UnicodeReplacer objects
067: */
068: public StringReplacer(String theOutput, int theCursorPos,
069: RuleBasedTransliterator.Data theData) {
070: output = theOutput;
071: cursorPos = theCursorPos;
072: hasCursor = true;
073: data = theData;
074: isComplex = true;
075: }
076:
077: /**
078: * Construct a StringReplacer that sets the emits the given output
079: * text and does not modify the cursor.
080: * @param theOutput text that will replace input text when the
081: * replace() method is called. May contain stand-in characters
082: * that represent nested replacers.
083: * @param theData transliterator context object that translates
084: * stand-in characters to UnicodeReplacer objects
085: */
086: public StringReplacer(String theOutput,
087: RuleBasedTransliterator.Data theData) {
088: output = theOutput;
089: cursorPos = 0;
090: hasCursor = false;
091: data = theData;
092: isComplex = true;
093: }
094:
095: //= public static UnicodeReplacer valueOf(String output,
096: //= int cursorPos,
097: //= RuleBasedTransliterator.Data data) {
098: //= if (output.length() == 1) {
099: //= char c = output.charAt(0);
100: //= UnicodeReplacer r = data.lookupReplacer(c);
101: //= if (r != null) {
102: //= return r;
103: //= }
104: //= }
105: //= return new StringReplacer(output, cursorPos, data);
106: //= }
107:
108: /**
109: * UnicodeReplacer API
110: */
111: public int replace(Replaceable text, int start, int limit,
112: int[] cursor) {
113: int outLen;
114: int newStart = 0;
115:
116: // NOTE: It should be possible to _always_ run the complex
117: // processing code; just slower. If not, then there is a bug
118: // in the complex processing code.
119:
120: // Simple (no nested replacers) Processing Code :
121: if (!isComplex) {
122: text.replace(start, limit, output);
123: outLen = output.length();
124:
125: // Setup default cursor position (for cursorPos within output)
126: newStart = cursorPos;
127: }
128:
129: // Complex (nested replacers) Processing Code :
130: else {
131: /* When there are segments to be copied, use the Replaceable.copy()
132: * API in order to retain out-of-band data. Copy everything to the
133: * end of the string, then copy them back over the key. This preserves
134: * the integrity of indices into the key and surrounding context while
135: * generating the output text.
136: */
137: StringBuffer buf = new StringBuffer();
138: int oOutput; // offset into 'output'
139: isComplex = false;
140:
141: // The temporary buffer starts at tempStart, and extends
142: // to destLimit + tempExtra. The start of the buffer has a single
143: // character from before the key. This provides style
144: // data when addition characters are filled into the
145: // temporary buffer. If there is nothing to the left, use
146: // the non-character U+FFFF, which Replaceable subclasses
147: // should treat specially as a "no-style character."
148: // destStart points to the point after the style context
149: // character, so it is tempStart+1 or tempStart+2.
150: int tempStart = text.length(); // start of temp buffer
151: int destStart = tempStart; // copy new text to here
152: if (start > 0) {
153: int len = UTF16.getCharCount(text.char32At(start - 1));
154: text.copy(start - len, start, tempStart);
155: destStart += len;
156: } else {
157: text.replace(tempStart, tempStart, "\uFFFF");
158: destStart++;
159: }
160: int destLimit = destStart;
161: int tempExtra = 0; // temp chars after destLimit
162:
163: for (oOutput = 0; oOutput < output.length();) {
164: if (oOutput == cursorPos) {
165: // Record the position of the cursor
166: newStart = destLimit - destStart; // relative to start
167: }
168: int c = UTF16.charAt(output, oOutput);
169:
170: // When we are at the last position copy the right style
171: // context character into the temporary buffer. We don't
172: // do this before because it will provide an incorrect
173: // right context for previous replace() operations.
174: int nextIndex = oOutput + UTF16.getCharCount(c);
175: if (nextIndex == output.length()) {
176: tempExtra = UTF16
177: .getCharCount(text.char32At(limit));
178: text.copy(limit, limit + tempExtra, destLimit);
179: }
180:
181: UnicodeReplacer r = data.lookupReplacer(c);
182: if (r == null) {
183: // Accumulate straight (non-segment) text.
184: UTF16.append(buf, c);
185: } else {
186: isComplex = true;
187:
188: // Insert any accumulated straight text.
189: if (buf.length() > 0) {
190: text.replace(destLimit, destLimit, buf
191: .toString());
192: destLimit += buf.length();
193: buf.setLength(0);
194: }
195:
196: // Delegate output generation to replacer object
197: int len = r.replace(text, destLimit, destLimit,
198: cursor);
199: destLimit += len;
200: }
201: oOutput = nextIndex;
202: }
203: // Insert any accumulated straight text.
204: if (buf.length() > 0) {
205: text.replace(destLimit, destLimit, buf.toString());
206: destLimit += buf.length();
207: }
208: if (oOutput == cursorPos) {
209: // Record the position of the cursor
210: newStart = destLimit - destStart; // relative to start
211: }
212:
213: outLen = destLimit - destStart;
214:
215: // Copy new text to start, and delete it
216: text.copy(destStart, destLimit, start);
217: text.replace(tempStart + outLen, destLimit + tempExtra
218: + outLen, "");
219:
220: // Delete the old text (the key)
221: text.replace(start + outLen, limit + outLen, "");
222: }
223:
224: if (hasCursor) {
225: // Adjust the cursor for positions outside the key. These
226: // refer to code points rather than code units. If cursorPos
227: // is within the output string, then use newStart, which has
228: // already been set above.
229: if (cursorPos < 0) {
230: newStart = start;
231: int n = cursorPos;
232: // Outside the output string, cursorPos counts code points
233: while (n < 0 && newStart > 0) {
234: newStart -= UTF16.getCharCount(text
235: .char32At(newStart - 1));
236: ++n;
237: }
238: newStart += n;
239: } else if (cursorPos > output.length()) {
240: newStart = start + outLen;
241: int n = cursorPos - output.length();
242: // Outside the output string, cursorPos counts code points
243: while (n > 0 && newStart < text.length()) {
244: newStart += UTF16.getCharCount(text
245: .char32At(newStart));
246: --n;
247: }
248: newStart += n;
249: } else {
250: // Cursor is within output string. It has been set up above
251: // to be relative to start.
252: newStart += start;
253: }
254:
255: cursor[0] = newStart;
256: }
257:
258: return outLen;
259: }
260:
261: /**
262: * UnicodeReplacer API
263: */
264: public String toReplacerPattern(boolean escapeUnprintable) {
265: StringBuffer rule = new StringBuffer();
266: StringBuffer quoteBuf = new StringBuffer();
267:
268: int cursor = cursorPos;
269:
270: // Handle a cursor preceding the output
271: if (hasCursor && cursor < 0) {
272: while (cursor++ < 0) {
273: Utility.appendToRule(rule, '@', true,
274: escapeUnprintable, quoteBuf);
275: }
276: // Fall through and append '|' below
277: }
278:
279: for (int i = 0; i < output.length(); ++i) {
280: if (hasCursor && i == cursor) {
281: Utility.appendToRule(rule, '|', true,
282: escapeUnprintable, quoteBuf);
283: }
284: char c = output.charAt(i); // Ok to use 16-bits here
285:
286: UnicodeReplacer r = data.lookupReplacer(c);
287: if (r == null) {
288: Utility.appendToRule(rule, c, false, escapeUnprintable,
289: quoteBuf);
290: } else {
291: StringBuffer buf = new StringBuffer(" ");
292: buf.append(r.toReplacerPattern(escapeUnprintable));
293: buf.append(' ');
294: Utility.appendToRule(rule, buf.toString(), true,
295: escapeUnprintable, quoteBuf);
296: }
297: }
298:
299: // Handle a cursor after the output. Use > rather than >= because
300: // if cursor == output.length() it is at the end of the output,
301: // which is the default position, so we need not emit it.
302: if (hasCursor && cursor > output.length()) {
303: cursor -= output.length();
304: while (cursor-- > 0) {
305: Utility.appendToRule(rule, '@', true,
306: escapeUnprintable, quoteBuf);
307: }
308: Utility.appendToRule(rule, '|', true, escapeUnprintable,
309: quoteBuf);
310: }
311: // Flush quoteBuf out to result
312: Utility.appendToRule(rule, -1, true, escapeUnprintable,
313: quoteBuf);
314:
315: return rule.toString();
316: }
317:
318: /**
319: * Union the set of all characters that may output by this object
320: * into the given set.
321: * @param toUnionTo the set into which to union the output characters
322: */
323: public void addReplacementSetTo(UnicodeSet toUnionTo) {
324: int ch;
325: for (int i = 0; i < output.length(); i += UTF16
326: .getCharCount(ch)) {
327: ch = UTF16.charAt(output, i);
328: UnicodeReplacer r = data.lookupReplacer(ch);
329: if (r == null) {
330: toUnionTo.add(ch);
331: } else {
332: r.addReplacementSetTo(toUnionTo);
333: }
334: }
335: }
336: }
337:
338: //eof
|