001: /*
002: * Java HTML Tidy - JTidy
003: * HTML parser and pretty printer
004: *
005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
006: * Institute of Technology, Institut National de Recherche en
007: * Informatique et en Automatique, Keio University). All Rights
008: * Reserved.
009: *
010: * Contributing Author(s):
011: *
012: * Dave Raggett <dsr@w3.org>
013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
014: * Gary L Peskin <garyp@firstech.com> (Java development)
015: * Sami Lempinen <sami@lempinen.net> (release management)
016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
017: *
018: * The contributing author(s) would like to thank all those who
019: * helped with testing, bug fixes, and patience. This wouldn't
020: * have been possible without all of you.
021: *
022: * COPYRIGHT NOTICE:
023: *
024: * This software and documentation is provided "as is," and
025: * the copyright holders and contributing author(s) make no
026: * representations or warranties, express or implied, including
027: * but not limited to, warranties of merchantability or fitness
028: * for any particular purpose or that the use of the software or
029: * documentation will not infringe any third party patents,
030: * copyrights, trademarks or other rights.
031: *
032: * The copyright holders and contributing author(s) will not be
033: * liable for any direct, indirect, special or consequential damages
034: * arising out of any use of the software or documentation, even if
035: * advised of the possibility of such damage.
036: *
037: * Permission is hereby granted to use, copy, modify, and distribute
038: * this source code, or portions hereof, documentation and executables,
039: * for any purpose, without fee, subject to the following restrictions:
040: *
041: * 1. The origin of this source code must not be misrepresented.
042: * 2. Altered versions must be plainly marked as such and must
043: * not be misrepresented as being the original source.
044: * 3. This Copyright notice may not be removed or altered from any
045: * source or altered source distribution.
046: *
047: * The copyright holders and contributing author(s) specifically
048: * permit, without fee, and encourage the use of this source code
049: * as a component for supporting the Hypertext Markup Language in
050: * commercial products. If you use this source code in a product,
051: * acknowledgment is not required but would be appreciated.
052: *
053: */
054: package org.w3c.tidy;
055:
056: import java.io.IOException;
057: import java.io.InputStream;
058: import java.io.InputStreamReader;
059: import java.io.Reader;
060: import java.io.UnsupportedEncodingException;
061:
062: /**
063: * StreamIn Implementation using java writers.
064: * @author Fabrizio Giustina
065: * @version $Revision: 1.5 $ ($Author: fgiust $)
066: */
067: public class StreamInJavaImpl implements StreamIn {
068:
069: /**
070: * number of characters kept in buffer.
071: */
072: private static final int CHARBUF_SIZE = 5;
073:
074: /**
075: * character buffer.
076: */
077: private int[] charbuf = new int[CHARBUF_SIZE];
078:
079: /**
080: * actual position in buffer.
081: */
082: private int bufpos;
083:
084: /**
085: * Java input stream reader.
086: */
087: private Reader reader;
088:
089: /**
090: * has end of stream been reached?
091: */
092: private boolean endOfStream;
093:
094: /**
095: * Is char pushed?
096: */
097: private boolean pushed;
098:
099: /**
100: * current column number.
101: */
102: private int curcol;
103:
104: /**
105: * last column.
106: */
107: private int lastcol;
108:
109: /**
110: * current line number.
111: */
112: private int curline;
113:
114: /**
115: * tab size in chars.
116: */
117: private int tabsize;
118:
119: private int tabs;
120:
121: /**
122: * Instantiates a new StreamInJavaImpl.
123: * @param stream
124: * @param encoding
125: * @param tabsize
126: * @throws UnsupportedEncodingException
127: */
128: public StreamInJavaImpl(InputStream stream, String encoding,
129: int tabsize) throws UnsupportedEncodingException {
130: reader = new InputStreamReader(stream, encoding);
131: this .pushed = false;
132: this .tabsize = tabsize;
133: this .curline = 1;
134: this .curcol = 1;
135: this .endOfStream = false;
136: }
137:
138: /**
139: * @see org.w3c.tidy.StreamIn#readCharFromStream()
140: */
141: public int readCharFromStream() {
142: int c;
143: try {
144: c = reader.read();
145: if (c < 0) {
146: endOfStream = true;
147: }
148:
149: } catch (IOException e) {
150: // @todo how to handle?
151: endOfStream = true;
152: return END_OF_STREAM;
153: }
154:
155: return c;
156: }
157:
158: /**
159: * @see org.w3c.tidy.StreamIn#readChar()
160: */
161: public int readChar() {
162: int c;
163:
164: if (this .pushed) {
165: c = this .charbuf[--(this .bufpos)];
166: if ((this .bufpos) == 0) {
167: this .pushed = false;
168: }
169:
170: if (c == '\n') {
171: this .curcol = 1;
172: this .curline++;
173: return c;
174: }
175:
176: this .curcol++;
177: return c;
178: }
179:
180: this .lastcol = this .curcol;
181:
182: if (this .tabs > 0) {
183: this .curcol++;
184: this .tabs--;
185: return ' ';
186: }
187:
188: c = readCharFromStream();
189:
190: if (c < 0) {
191: endOfStream = true;
192: return END_OF_STREAM;
193: }
194:
195: if (c == '\n') {
196: this .curcol = 1;
197: this .curline++;
198: return c;
199: } else if (c == '\r') // \r\n
200: {
201: c = readCharFromStream();
202: if (c != '\n') {
203: if (c != END_OF_STREAM) {
204: ungetChar(c);
205: }
206: c = '\n';
207: }
208: this .curcol = 1;
209: this .curline++;
210: return c;
211: }
212:
213: if (c == '\t') {
214: this .tabs = this .tabsize
215: - ((this .curcol - 1) % this .tabsize) - 1;
216: this .curcol++;
217: c = ' ';
218: return c;
219: }
220:
221: this .curcol++;
222:
223: return c;
224: }
225:
226: /**
227: * @see org.w3c.tidy.StreamIn#ungetChar(int)
228: */
229: public void ungetChar(int c) {
230: this .pushed = true;
231: if (this .bufpos >= CHARBUF_SIZE) {
232: // pop last element
233: System.arraycopy(this .charbuf, 0, this .charbuf, 1,
234: CHARBUF_SIZE - 1);
235: this .bufpos--;
236: }
237: this .charbuf[(this .bufpos)++] = c;
238:
239: if (c == '\n') {
240: --this .curline;
241: }
242:
243: this .curcol = this .lastcol;
244: }
245:
246: /**
247: * @see org.w3c.tidy.StreamIn#isEndOfStream()
248: */
249: public boolean isEndOfStream() {
250: return endOfStream;
251: }
252:
253: /**
254: * Getter for <code>curcol</code>.
255: * @return Returns the curcol.
256: */
257: public int getCurcol() {
258: return this .curcol;
259: }
260:
261: /**
262: * Getter for <code>curline</code>.
263: * @return Returns the curline.
264: */
265: public int getCurline() {
266: return this .curline;
267: }
268:
269: /**
270: * @see org.w3c.tidy.StreamIn#setLexer(org.w3c.tidy.Lexer)
271: */
272: public void setLexer(Lexer lexer) {
273: // unused in the java implementation
274: }
275:
276: }
|