001: /* StdXMLReader.java NanoXML/Java
002: *
003: * $Revision: 1.1 $
004: * $Date: 2004/01/30 14:51:58 $
005: * $Name: $
006: *
007: * This file is part of NanoXML 2 for Java.
008: * Copyright (C) 2000-2002 Marc De Scheemaecker, All Rights Reserved.
009: *
010: * This software is provided 'as-is', without any express or implied warranty.
011: * In no event will the authors be held liable for any damages arising from the
012: * use of this software.
013: *
014: * Permission is granted to anyone to use this software for any purpose,
015: * including commercial applications, and to alter it and redistribute it
016: * freely, subject to the following restrictions:
017: *
018: * 1. The origin of this software must not be misrepresented; you must not
019: * claim that you wrote the original software. If you use this software in
020: * a product, an acknowledgment in the product documentation would be
021: * appreciated but is not required.
022: *
023: * 2. Altered source versions must be plainly marked as such, and must not be
024: * misrepresented as being the original software.
025: *
026: * 3. This notice may not be removed or altered from any source distribution.
027: */
028:
029: package net.n3.nanoxml;
030:
031: import java.io.InputStream;
032: import java.io.InputStreamReader;
033: import java.io.IOException;
034: import java.io.File;
035: import java.io.FileInputStream;
036: import java.io.FileNotFoundException;
037: import java.io.LineNumberReader;
038: import java.io.PushbackReader;
039: import java.io.PushbackInputStream;
040: import java.io.Reader;
041: import java.io.StringReader;
042: import java.io.UnsupportedEncodingException;
043: import java.net.MalformedURLException;
044: import java.net.URL;
045: import java.util.Stack;
046:
047: /**
048: * StdXMLReader reads the data to be parsed.
049: *
050: * @author Marc De Scheemaecker
051: * @version $Name: $, $Revision: 1.1 $
052: */
053: public class StdXMLReader implements IXMLReader {
054:
055: /**
056: * A stacked reader.
057: *
058: * @author Marc De Scheemaecker
059: * @version $Name: $, $Revision: 1.1 $
060: */
061: private class StackedReader {
062:
063: PushbackReader pbReader;
064:
065: LineNumberReader lineReader;
066:
067: URL systemId;
068:
069: String publicId;
070:
071: }
072:
073: /**
074: * The stack of readers.
075: */
076: private Stack readers;
077:
078: /**
079: * The current push-back reader.
080: */
081: private StackedReader currentReader;
082:
083: /**
084: * Creates a new reader using a string as input.
085: *
086: * @param str the string containing the XML data
087: */
088: public static IXMLReader stringReader(String str) {
089: return new StdXMLReader(new StringReader(str));
090: }
091:
092: /**
093: * Creates a new reader using a file as input.
094: *
095: * @param filename the name of the file containing the XML data
096: *
097: * @throws java.io.FileNotFoundException
098: * if the file could not be found
099: * @throws java.io.IOException
100: * if an I/O error occurred
101: */
102: public static IXMLReader fileReader(String filename)
103: throws FileNotFoundException, IOException {
104: StdXMLReader r = new StdXMLReader(new FileInputStream(filename));
105: r.setSystemID(filename);
106:
107: for (int i = 0; i < r.readers.size(); i++) {
108: StackedReader sr = (StackedReader) r.readers.elementAt(i);
109: sr.systemId = r.currentReader.systemId;
110: }
111:
112: return r;
113: }
114:
115: /**
116: * Initializes the reader from a system and public ID.
117: *
118: * @param publicID the public ID which may be null.
119: * @param systemID the non-null system ID.
120: *
121: * @throws MalformedURLException
122: * if the system ID does not contain a valid URL
123: * @throws FileNotFoundException
124: * if the system ID refers to a local file which does not exist
125: * @throws IOException
126: * if an error occurred opening the stream
127: */
128: public StdXMLReader(String publicID, String systemID)
129: throws MalformedURLException, FileNotFoundException,
130: IOException {
131: URL systemIDasURL = null;
132:
133: try {
134: systemIDasURL = new URL(systemID);
135: } catch (MalformedURLException e) {
136: systemID = "file:" + systemID;
137:
138: try {
139: systemIDasURL = new URL(systemID);
140: } catch (MalformedURLException e2) {
141: throw e;
142: }
143: }
144:
145: this .currentReader = new StackedReader();
146: this .readers = new Stack();
147: Reader reader = this .openStream(publicID, systemIDasURL
148: .toString());
149: this .currentReader.lineReader = new LineNumberReader(reader);
150: this .currentReader.pbReader = new PushbackReader(
151: this .currentReader.lineReader, 2);
152: }
153:
154: /**
155: * Initializes the XML reader.
156: *
157: * @param reader the input for the XML data.
158: */
159: public StdXMLReader(Reader reader) {
160: this .currentReader = new StackedReader();
161: this .readers = new Stack();
162: this .currentReader.lineReader = new LineNumberReader(reader);
163: this .currentReader.pbReader = new PushbackReader(
164: this .currentReader.lineReader, 2);
165: this .currentReader.publicId = "";
166:
167: try {
168: this .currentReader.systemId = new URL("file:.");
169: } catch (MalformedURLException e) {
170: // never happens
171: }
172: }
173:
174: /**
175: * Cleans up the object when it's destroyed.
176: */
177: protected void finalize() throws Throwable {
178: this .currentReader.lineReader = null;
179: this .currentReader.pbReader = null;
180: this .currentReader.systemId = null;
181: this .currentReader.publicId = null;
182: this .currentReader = null;
183: this .readers.removeAllElements();
184: super .finalize();
185: }
186:
187: /**
188: * Scans the encoding from an <?xml...?> tag.
189: *
190: * @param str the first tag in the XML data.
191: *
192: * @return the encoding, or null if no encoding has been specified.
193: */
194: protected String getEncoding(String str) {
195: if (!str.startsWith("<?xml")) {
196: return null;
197: }
198:
199: int index = 5;
200:
201: while (index < str.length()) {
202: StringBuffer key = new StringBuffer();
203:
204: while ((index < str.length()) && (str.charAt(index) <= ' ')) {
205: index++;
206: }
207:
208: while ((index < str.length()) && (str.charAt(index) >= 'a')
209: && (str.charAt(index) <= 'z')) {
210: key.append(str.charAt(index));
211: index++;
212: }
213:
214: while ((index < str.length()) && (str.charAt(index) <= ' ')) {
215: index++;
216: }
217:
218: if ((index >= str.length()) || (str.charAt(index) != '=')) {
219: break;
220: }
221:
222: while ((index < str.length())
223: && (str.charAt(index) != '\'')
224: && (str.charAt(index) != '"')) {
225: index++;
226: }
227:
228: if (index >= str.length()) {
229: break;
230: }
231:
232: char delimiter = str.charAt(index);
233: index++;
234: int index2 = str.indexOf(delimiter, index);
235:
236: if (index2 < 0) {
237: break;
238: }
239:
240: if (key.toString().equals("encoding")) {
241: return str.substring(index, index2);
242: }
243:
244: index = index2 + 1;
245: }
246:
247: return null;
248: }
249:
250: /**
251: * Converts a stream to a reader while detecting the encoding.
252: *
253: * @param stream the input for the XML data.
254: * @param charsRead buffer where to put characters that have been read
255: *
256: * @throws java.io.IOException
257: * if an I/O error occurred
258: */
259: protected Reader stream2reader(InputStream stream,
260: StringBuffer charsRead) throws IOException {
261: PushbackInputStream pbstream = new PushbackInputStream(stream);
262: int b = pbstream.read();
263:
264: switch (b) {
265: case 0x00:
266: case 0xFE:
267: case 0xFF:
268: pbstream.unread(b);
269: return new InputStreamReader(pbstream, "UTF-16");
270:
271: case 0xEF:
272: for (int i = 0; i < 2; i++) {
273: pbstream.read();
274: }
275:
276: return new InputStreamReader(pbstream, "UTF-8");
277:
278: case 0x3C:
279: b = pbstream.read();
280: charsRead.append('<');
281:
282: while ((b > 0) && (b != 0x3E)) {
283: charsRead.append((char) b);
284: b = pbstream.read();
285: }
286:
287: if (b > 0) {
288: charsRead.append((char) b);
289: }
290:
291: String encoding = this .getEncoding(charsRead.toString());
292:
293: if (encoding == null) {
294: return new InputStreamReader(pbstream, "UTF-8");
295: }
296:
297: charsRead.setLength(0);
298:
299: try {
300: return new InputStreamReader(pbstream, encoding);
301: } catch (UnsupportedEncodingException e) {
302: return new InputStreamReader(pbstream, "UTF-8");
303: }
304:
305: default:
306: charsRead.append((char) b);
307: return new InputStreamReader(pbstream, "UTF-8");
308: }
309: }
310:
311: /**
312: * Initializes the XML reader.
313: *
314: * @param stream the input for the XML data.
315: *
316: * @throws java.io.IOException
317: * if an I/O error occurred
318: */
319: public StdXMLReader(InputStream stream) throws IOException {
320: PushbackInputStream pbstream = new PushbackInputStream(stream);
321: StringBuffer charsRead = new StringBuffer();
322: Reader reader = this .stream2reader(stream, charsRead);
323: this .currentReader = new StackedReader();
324: this .readers = new Stack();
325: this .currentReader.lineReader = new LineNumberReader(reader);
326: this .currentReader.pbReader = new PushbackReader(
327: this .currentReader.lineReader, 2);
328: this .currentReader.publicId = "";
329:
330: try {
331: this .currentReader.systemId = new URL("file:.");
332: } catch (MalformedURLException e) {
333: // never happens
334: }
335:
336: this .startNewStream(new StringReader(charsRead.toString()));
337: }
338:
339: /**
340: * Reads a character.
341: *
342: * @return the character
343: *
344: * @throws java.io.IOException
345: * if no character could be read
346: */
347: public char read() throws IOException {
348: int ch = this .currentReader.pbReader.read();
349:
350: while (ch < 0) {
351: if (this .readers.empty()) {
352: throw new IOException("Unexpected EOF");
353: }
354:
355: this .currentReader.pbReader.close();
356: this .currentReader = (StackedReader) this .readers.pop();
357: ch = this .currentReader.pbReader.read();
358: }
359:
360: return (char) ch;
361: }
362:
363: /**
364: * Returns true if the current stream has no more characters left to be
365: * read.
366: *
367: * @throws java.io.IOException
368: * if an I/O error occurred
369: */
370: public boolean atEOFOfCurrentStream() throws IOException {
371: int ch = this .currentReader.pbReader.read();
372:
373: if (ch < 0) {
374: return true;
375: } else {
376: this .currentReader.pbReader.unread(ch);
377: return false;
378: }
379: }
380:
381: /**
382: * Returns true if there are no more characters left to be read.
383: *
384: * @throws java.io.IOException
385: * if an I/O error occurred
386: */
387: public boolean atEOF() throws IOException {
388: int ch = this .currentReader.pbReader.read();
389:
390: while (ch < 0) {
391: if (this .readers.empty()) {
392: return true;
393: }
394:
395: this .currentReader.pbReader.close();
396: this .currentReader = (StackedReader) this .readers.pop();
397: ch = this .currentReader.pbReader.read();
398: }
399:
400: this .currentReader.pbReader.unread(ch);
401: return false;
402: }
403:
404: /**
405: * Pushes the last character read back to the stream.
406: *
407: * @param ch the character to push back.
408: *
409: * @throws java.io.IOException
410: * if an I/O error occurred
411: */
412: public void unread(char ch) throws IOException {
413: this .currentReader.pbReader.unread(ch);
414: }
415:
416: /**
417: * Opens a stream from a public and system ID.
418: *
419: * @param publicID the public ID, which may be null
420: * @param systemID the system ID, which is never null
421: *
422: * @throws java.net.MalformedURLException
423: * if the system ID does not contain a valid URL
424: * @throws java.io.FileNotFoundException
425: * if the system ID refers to a local file which does not exist
426: * @throws java.io.IOException
427: * if an error occurred opening the stream
428: */
429: public Reader openStream(String publicID, String systemID)
430: throws MalformedURLException, FileNotFoundException,
431: IOException {
432: URL url = new URL(this .currentReader.systemId, systemID);
433:
434: if (url.getRef() != null) {
435: String ref = url.getRef();
436:
437: if (url.getFile().length() > 0) {
438: url = new URL(url.getProtocol(), url.getHost(), url
439: .getPort(), url.getFile());
440: url = new URL("jar:" + url + '!' + ref);
441: } else {
442: url = StdXMLReader.class.getResource(ref);
443: }
444: }
445:
446: this .currentReader.publicId = publicID;
447: this .currentReader.systemId = url;
448: StringBuffer charsRead = new StringBuffer();
449: Reader reader = this .stream2reader(url.openStream(), charsRead);
450:
451: if (charsRead.length() == 0) {
452: return reader;
453: }
454:
455: String charsReadStr = charsRead.toString();
456: PushbackReader pbreader = new PushbackReader(reader,
457: charsReadStr.length());
458:
459: for (int i = charsReadStr.length() - 1; i >= 0; i--) {
460: pbreader.unread(charsReadStr.charAt(i));
461: }
462:
463: return pbreader;
464: }
465:
466: /**
467: * Starts a new stream from a Java reader. The new stream is used
468: * temporary to read data from. If that stream is exhausted, control
469: * returns to the parent stream.
470: *
471: * @param reader the non-null reader to read the new data from
472: */
473: public void startNewStream(Reader reader) {
474: this .startNewStream(reader, false);
475: }
476:
477: /**
478: * Starts a new stream from a Java reader. The new stream is used
479: * temporary to read data from. If that stream is exhausted, control
480: * returns to the parent stream.
481: *
482: * @param reader the non-null reader to read the new data from
483: * @param isInternalEntity true if the reader is produced by resolving
484: * an internal entity
485: */
486: public void startNewStream(Reader reader, boolean isInternalEntity) {
487: StackedReader oldReader = this .currentReader;
488: this .readers.push(this .currentReader);
489: this .currentReader = new StackedReader();
490:
491: if (isInternalEntity) {
492: this .currentReader.lineReader = null;
493: this .currentReader.pbReader = new PushbackReader(reader, 2);
494: } else {
495: this .currentReader.lineReader = new LineNumberReader(reader);
496: this .currentReader.pbReader = new PushbackReader(
497: this .currentReader.lineReader, 2);
498: }
499:
500: this .currentReader.systemId = oldReader.systemId;
501: this .currentReader.publicId = oldReader.publicId;
502: }
503:
504: /**
505: * Returns the current "level" of the stream on the stack of streams.
506: */
507: public int getStreamLevel() {
508: return this .readers.size();
509: }
510:
511: /**
512: * Returns the line number of the data in the current stream.
513: */
514: public int getLineNr() {
515: if (this .currentReader.lineReader == null) {
516: StackedReader sr = (StackedReader) this .readers.peek();
517:
518: if (sr.lineReader == null) {
519: return 0;
520: } else {
521: return sr.lineReader.getLineNumber() + 1;
522: }
523: }
524:
525: return this .currentReader.lineReader.getLineNumber() + 1;
526: }
527:
528: /**
529: * Sets the system ID of the current stream.
530: *
531: * @param systemID the system ID
532: *
533: * @throws java.net.MalformedURLException
534: * if the system ID does not contain a valid URL
535: */
536: public void setSystemID(String systemID)
537: throws MalformedURLException {
538: this .currentReader.systemId = new URL(
539: this .currentReader.systemId, systemID);
540: }
541:
542: /**
543: * Sets the public ID of the current stream.
544: *
545: * @param publicID the public ID
546: */
547: public void setPublicID(String publicID) {
548: this .currentReader.publicId = publicID;
549: }
550:
551: /**
552: * Returns the current system ID.
553: */
554: public String getSystemID() {
555: return this .currentReader.systemId.toString();
556: }
557:
558: /**
559: * Returns the current public ID.
560: */
561: public String getPublicID() {
562: return this.currentReader.publicId;
563: }
564:
565: }
|