001: /*
002: * Janino - An embedded Java[TM] compiler
003: *
004: * Copyright (c) 2006, Arno Unkrig
005: * All rights reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: * 2. Redistributions in binary form must reproduce the above
014: * copyright notice, this list of conditions and the following
015: * disclaimer in the documentation and/or other materials
016: * provided with the distribution.
017: * 3. The name of the author may not be used to endorse or promote
018: * products derived from this software without specific prior
019: * written permission.
020: *
021: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
022: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
023: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
025: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
026: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
027: * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
029: * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
030: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
031: * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
032: */
033:
034: package org.codehaus.janino;
035:
036: import java.io.*;
037:
038: /**
039: * A {@link FilterReader} that unescapes the "Unicode Escapes"
040: * as described in
041: * <a href="http://java.sun.com/docs/books/jls/second_edition/html/lexical.doc.html#100850">the
042: * Java Language Specification, 2nd edition</a>.
043: * <p>
044: * Notice that it is possible to formulate invalid escape sequences, e.g.
045: * "\u123g" ("g" is not a valid hex character). This is handled by
046: * throwing a {@link java.lang.RuntimeException}-derived
047: * {@link org.codehaus.janino.UnicodeUnescapeException}.
048: */
049: public class UnicodeUnescapeReader extends FilterReader {
050:
051: /**
052: * @param in
053: */
054: public UnicodeUnescapeReader(Reader in) {
055: super (in);
056: }
057:
058: /**
059: * Override {@link FilterReader#read()}.
060: *
061: * @throws UnicodeUnescapeException Invalid escape sequence encountered
062: */
063: public int read() throws IOException {
064: int c;
065:
066: // Read next character.
067: if (this .unreadChar == -1) {
068: c = this .in.read();
069: } else {
070: c = this .unreadChar;
071: this .unreadChar = -1;
072: }
073:
074: // Check for backslash-u escape sequence, preceeded with an even number
075: // of backslashes.
076: if (c != '\\' || this .oddPrecedingBackslashes) {
077: this .oddPrecedingBackslashes = false;
078: return c;
079: }
080:
081: // Read one character ahead and check if it is a "u".
082: c = this .in.read();
083: if (c != 'u') {
084: this .unreadChar = c;
085: this .oddPrecedingBackslashes = true;
086: return '\\';
087: }
088:
089: // Skip redundant "u"s.
090: do {
091: c = this .in.read();
092: if (c == -1)
093: throw new UnicodeUnescapeException(
094: "Incomplete escape sequence");
095: } while (c == 'u');
096:
097: // Decode escape sequence.
098: char[] ca = new char[4];
099: ca[0] = (char) c;
100: if (this .in.read(ca, 1, 3) != 3)
101: throw new UnicodeUnescapeException(
102: "Incomplete escape sequence");
103: try {
104: return 0xffff & Integer.parseInt(new String(ca), 16);
105: } catch (NumberFormatException ex) {
106: throw new UnicodeUnescapeException(
107: "Invalid escape sequence \"\\u" + new String(ca)
108: + "\"");
109: }
110: }
111:
112: /**
113: * Override {@link FilterReader#read(char[], int, int)}.
114: */
115: public int read(char[] cbuf, int off, int len) throws IOException {
116: if (len == 0)
117: return 0;
118: int res = 0;
119: do {
120: int c = this .read();
121: if (c == -1)
122: break;
123: cbuf[off++] = (char) c;
124: } while (++res < len);
125: return res == 0 ? -1 : res;
126: }
127:
128: /**
129: * Simple unit testing.
130: */
131: public static void main(String[] args) throws IOException {
132: Reader r = new UnicodeUnescapeReader(new StringReader(args[0]));
133: for (;;) {
134: int c = r.read();
135: if (c == -1)
136: break;
137: System.out.print((char) c);
138: }
139: System.out.println();
140: }
141:
142: private int unreadChar = -1; // -1 == none
143: private boolean oddPrecedingBackslashes = false;
144: }
|