001: /*
002: * Copyright 1995-2003 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package sun.tools.java;
027:
028: import java.io.IOException;
029: import java.io.InputStream;
030: import java.io.InputStreamReader;
031: import java.io.BufferedReader;
032: import java.io.FilterReader;
033: import java.io.UnsupportedEncodingException;
034:
035: /**
036: * An input stream for java programs. The stream treats either "\n", "\r"
037: * or "\r\n" as the end of a line, it always returns \n. It also parses
038: * UNICODE characters expressed as \uffff. However, if it sees "\\", the
039: * second slash cannot begin a unicode sequence. It keeps track of the current
040: * position in the input stream.
041: *
042: * WARNING: The contents of this source file are not part of any
043: * supported API. Code that depends on them does so at its own risk:
044: * they are subject to change or removal without notice.
045: *
046: * @author Arthur van Hoff
047: * @version 1.12, 01/17/97
048: */
049:
050: public class ScannerInputReader extends FilterReader implements
051: Constants {
052: // A note. This class does not really properly subclass FilterReader.
053: // Since this class only overrides the single character read method,
054: // and not the multi-character read method, any use of the latter
055: // will not work properly. Any attempt to use this code outside of
056: // the compiler should take that into account.
057: //
058: // For efficiency, it might be worth moving this code to Scanner and
059: // getting rid of this class.
060:
061: Environment env;
062: long pos;
063:
064: private long chpos;
065: private int pushBack = -1;
066:
067: public ScannerInputReader(Environment env, InputStream in)
068: throws UnsupportedEncodingException {
069: // ScannerInputStream has been modified to no longer use
070: // BufferedReader. It now does its own buffering for
071: // performance.
072: super (
073: env.getCharacterEncoding() != null ? new InputStreamReader(
074: in, env.getCharacterEncoding())
075: : new InputStreamReader(in));
076:
077: // Start out the buffer empty.
078: currentIndex = 0;
079: numChars = 0;
080:
081: this .env = env;
082: chpos = Scanner.LINEINC;
083: }
084:
085: //------------------------------------------------------------
086: // Buffering code.
087:
088: // The size of our buffer.
089: private static final int BUFFERLEN = 10 * 1024;
090:
091: // A character buffer.
092: private final char[] buffer = new char[BUFFERLEN];
093:
094: // The index of the next character to be "read" from the buffer.
095: private int currentIndex;
096:
097: // The number of characters in the buffer. -1 if EOF is reached.
098: private int numChars;
099:
100: /**
101: * Get the next character from our buffer.
102: * Note: this method has been inlined by hand in the `read' method
103: * below. Any changes made to this method should be equally applied
104: * to that code.
105: */
106: private int getNextChar() throws IOException {
107: // Check to see if we have either run out of characters in our
108: // buffer or gotten to EOF on a previous call.
109: if (currentIndex >= numChars) {
110: numChars = in.read(buffer);
111: if (numChars == -1) {
112: // We have reached EOF.
113: return -1;
114: }
115:
116: // No EOF. currentIndex points to first char in buffer.
117: currentIndex = 0;
118: }
119:
120: return buffer[currentIndex++];
121: }
122:
123: //------------------------------------------------------------
124:
125: public int read(char[] buffer, int off, int len) {
126: throw new CompilerError(
127: "ScannerInputReader is not a fully implemented reader.");
128: }
129:
130: public int read() throws IOException {
131: pos = chpos;
132: chpos += Scanner.OFFSETINC;
133:
134: int c = pushBack;
135: if (c == -1) {
136: getchar: try {
137: // Here the call...
138: // c = getNextChar();
139: // has been inlined by hand for performance.
140:
141: if (currentIndex >= numChars) {
142: numChars = in.read(buffer);
143: if (numChars == -1) {
144: // We have reached EOF.
145: c = -1;
146: break getchar;
147: }
148:
149: // No EOF. currentIndex points to first char in buffer.
150: currentIndex = 0;
151: }
152: c = buffer[currentIndex++];
153:
154: } catch (java.io.CharConversionException e) {
155: env.error(pos, "invalid.encoding.char");
156: // this is fatal error
157: return -1;
158: }
159: } else {
160: pushBack = -1;
161: }
162:
163: // parse special characters
164: switch (c) {
165: case -2:
166: // -2 is a special code indicating a pushback of a backslash that
167: // definitely isn't the start of a unicode sequence.
168: return '\\';
169:
170: case '\\':
171: if ((c = getNextChar()) != 'u') {
172: pushBack = (c == '\\' ? -2 : c);
173: return '\\';
174: }
175: // we have a unicode sequence
176: chpos += Scanner.OFFSETINC;
177: while ((c = getNextChar()) == 'u') {
178: chpos += Scanner.OFFSETINC;
179: }
180:
181: // unicode escape sequence
182: int d = 0;
183: for (int i = 0; i < 4; i++, chpos += Scanner.OFFSETINC, c = getNextChar()) {
184: switch (c) {
185: case '0':
186: case '1':
187: case '2':
188: case '3':
189: case '4':
190: case '5':
191: case '6':
192: case '7':
193: case '8':
194: case '9':
195: d = (d << 4) + c - '0';
196: break;
197:
198: case 'a':
199: case 'b':
200: case 'c':
201: case 'd':
202: case 'e':
203: case 'f':
204: d = (d << 4) + 10 + c - 'a';
205: break;
206:
207: case 'A':
208: case 'B':
209: case 'C':
210: case 'D':
211: case 'E':
212: case 'F':
213: d = (d << 4) + 10 + c - 'A';
214: break;
215:
216: default:
217: env.error(pos, "invalid.escape.char");
218: pushBack = c;
219: return d;
220: }
221: }
222: pushBack = c;
223:
224: // To read the following line, switch \ and /...
225: // Handle /u000a, /u000A, /u000d, /u000D properly as
226: // line terminators as per JLS 3.4, even though they are encoded
227: // (this properly respects the order given in JLS 3.2).
228: switch (d) {
229: case '\n':
230: chpos += Scanner.LINEINC;
231: return '\n';
232: case '\r':
233: if ((c = getNextChar()) != '\n') {
234: pushBack = c;
235: } else {
236: chpos += Scanner.OFFSETINC;
237: }
238: chpos += Scanner.LINEINC;
239: return '\n';
240: default:
241: return d;
242: }
243:
244: case '\n':
245: chpos += Scanner.LINEINC;
246: return '\n';
247:
248: case '\r':
249: if ((c = getNextChar()) != '\n') {
250: pushBack = c;
251: } else {
252: chpos += Scanner.OFFSETINC;
253: }
254: chpos += Scanner.LINEINC;
255: return '\n';
256:
257: default:
258: return c;
259: }
260: }
261: }
|