001: /* ====================================================================
002: * Tea - Copyright (c) 1997-2000 Walt Disney Internet Group
003: * ====================================================================
004: * The Tea Software License, Version 1.1
005: *
006: * Copyright (c) 2000 Walt Disney Internet Group. All rights reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Walt Disney Internet Group (http://opensource.go.com/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Tea", "TeaServlet", "Kettle", "Trove" and "BeanDoc" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact opensource@dig.com.
031: *
032: * 5. Products derived from this software may not be called "Tea",
033: * "TeaServlet", "Kettle" or "Trove", nor may "Tea", "TeaServlet",
034: * "Kettle", "Trove" or "BeanDoc" appear in their name, without prior
035: * written permission of the Walt Disney Internet Group.
036: *
037: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
038: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
039: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
040: * DISCLAIMED. IN NO EVENT SHALL THE WALT DISNEY INTERNET GROUP OR ITS
041: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
042: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
043: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
044: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
045: * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
046: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
047: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
048: * ====================================================================
049: *
050: * For more information about Tea, please see http://opensource.go.com/.
051: */
052:
053: package com.go.tea.io;
054:
055: import java.io.*;
056:
057: /******************************************************************************
058: * This reader handles unicode escapes in a character stream as defined by
059: * <i>The Java Language Specification</i>.
060: *
061: * <p>A unicode escape consists of six characters: '\' and 'u' followed by
062: * four hexadecimal digits. If the format of the escape is not correct, then
063: * the escape is unprocessed. To prevent a correctly formatted unicode escape
064: * from being processed, preceed it with another '\'.
065: *
066: * @author Brian S O'Neill
067: * @version
068: * <!--$$Revision:--> 19 <!-- $-->, <!--$$JustDate:--> 12/11/00 <!-- $-->
069: * @deprecated Moved to com.go.trove.io package.
070: */
071: public class UnicodeReader extends EscapeReader {
072: /** Just a temporary buffer for holding the four hexadecimal digits. */
073: private char[] mMinibuf = new char[4];
074:
075: private boolean mEscaped;
076:
077: /**
078: * A UnicodeReader needs an underlying source Reader.
079: *
080: * @param source the source PositionReader
081: */
082: public UnicodeReader(Reader source) {
083: super (source, 6);
084: }
085:
086: public int read() throws IOException {
087: int c = mSource.read();
088:
089: if (c != '\\' || !mEscapesEnabled) {
090: mEscaped = false;
091: return c;
092: }
093:
094: c = mSource.read();
095:
096: // Have scanned "\\"? (two backslashes)
097: if (c == '\\') {
098: mEscaped = !mEscaped;
099: mSource.unread();
100: return '\\';
101: }
102:
103: // Have not scanned '\', 'u'?
104: if (c != 'u') {
105: mSource.unread();
106: return '\\';
107: }
108:
109: // At this point, have scanned '\', 'u'.
110:
111: // If previously escaped, then don't process unicode escape.
112: if (mEscaped) {
113: mEscaped = false;
114: mSource.unread();
115: return '\\';
116: }
117:
118: int len = mSource.read(mMinibuf, 0, 4);
119:
120: if (len == 4) {
121: try {
122: int val = Integer.valueOf(new String(mMinibuf, 0, 4),
123: 16).intValue();
124:
125: return val;
126: } catch (NumberFormatException e) {
127: // If the number is not a parseable as hexadecimal, then
128: // treat this as a bad format and do not process the
129: // unicode escape.
130: }
131: }
132:
133: // Unread the four hexadecimal characters and the leading 'u'.
134: if (len >= 0) {
135: mSource.unread(len + 1);
136: }
137:
138: return '\\';
139: }
140:
141: public static void main(String[] arg) throws Exception {
142: Tester.test(arg);
143: }
144:
145: private static class Tester {
146: public static void test(String[] arg) throws Exception {
147: String str = "This is \\" + "u0061 test.\n" + "This is \\"
148: + "u00612 test.\n" + "This is \\" + "u0061" + "\\"
149: + "u0061" + " test.\n" + "This is \\"
150: + "u061 test.\n" + "This is \\\\" + "u0061 test.\n"
151: + "This is \\" + "a test.\n";
152:
153: System.out.println("\nOriginal:\n");
154:
155: Reader reader = new StringReader(str);
156:
157: int c;
158: while ((c = reader.read()) >= 0) {
159: System.out.print((char) c);
160: }
161:
162: System.out.println("\nConverted:\n");
163:
164: reader = new StringReader(str);
165: reader = new UnicodeReader(reader);
166:
167: while ((c = reader.read()) != -1) {
168: System.out.print((char) c);
169: }
170:
171: System.out.println("\nUnread test 1:\n");
172:
173: reader = new StringReader(str);
174: PushbackPositionReader pr = new PushbackPositionReader(
175: new UnicodeReader(reader), 1);
176:
177: while ((c = pr.read()) != -1) {
178: pr.unread();
179: c = pr.read();
180: System.out.print((char) c);
181: }
182:
183: System.out.println("\nUnread test 2:\n");
184:
185: reader = new StringReader(str);
186: pr = new PushbackPositionReader(new UnicodeReader(reader),
187: 2);
188:
189: int i = 0;
190: while ((c = pr.read()) != -1) {
191: if ((i++ % 5) == 0) {
192: c = pr.read();
193: pr.unread();
194: pr.unread();
195: c = pr.read();
196: }
197:
198: System.out.print((char) c);
199: }
200:
201: System.out.println("\nUnread position test:\n");
202:
203: reader = new StringReader(str);
204: pr = new PushbackPositionReader(new UnicodeReader(reader),
205: 2);
206:
207: System.out.print(pr.getNextPosition() + "\t");
208: i = 0;
209: while ((c = pr.read()) != -1) {
210: if ((i++ % 5) == 0) {
211: c = pr.read();
212: pr.unread();
213: pr.unread();
214: c = pr.read();
215: }
216:
217: System.out.println((char) c);
218: System.out.print(pr.getNextPosition() + "\t");
219: }
220: }
221: }
222: }
|