001: /* StdXMLReader.java NanoXML/Java
002: *
003: * $Revision: 2056 $
004: * $Date: 2008-02-25 00:29:28 -0800 (Mon, 25 Feb 2008) $
005: * $Name$
006: *
007: * This file is part of NanoXML 2 for Java.
008: * Copyright (C) 2001 Marc De Scheemaecker, All Rights Reserved.
009: *
010: * This software is provided 'as-is', without any express or implied warranty.
011: * In no event will the authors be held liable for any damages arising from the
012: * use of this software.
013: *
014: * Permission is granted to anyone to use this software for any purpose,
015: * including commercial applications, and to alter it and redistribute it
016: * freely, subject to the following restrictions:
017: *
018: * 1. The origin of this software must not be misrepresented; you must not
019: * claim that you wrote the original software. If you use this software in
020: * a product, an acknowledgment in the product documentation would be
021: * appreciated but is not required.
022: *
023: * 2. Altered source versions must be plainly marked as such, and must not be
024: * misrepresented as being the original software.
025: *
026: * 3. This notice may not be removed or altered from any source distribution.
027: */
028:
029: package net.n3.nanoxml;
030:
031: import java.io.FileInputStream;
032: import java.io.FileNotFoundException;
033: import java.io.IOException;
034: import java.io.InputStream;
035: import java.io.InputStreamReader;
036: import java.io.LineNumberReader;
037: import java.io.PushbackInputStream;
038: import java.io.PushbackReader;
039: import java.io.Reader;
040: import java.io.StringReader;
041: import java.io.UnsupportedEncodingException;
042: import java.net.MalformedURLException;
043: import java.net.URL;
044: import java.util.Stack;
045:
046: /**
047: * StdXMLReader reads the data to be parsed.
048: *
049: * @author Marc De Scheemaecker
050: * @version $Name$, $Revision: 2056 $
051: */
052: public class StdXMLReader implements IXMLReader {
053:
054: /**
055: * The stack of push-back readers.
056: */
057: private Stack<PushbackReader> pbreaders;
058:
059: /**
060: * The stack of line-number readers.
061: */
062: private Stack<LineNumberReader> linereaders;
063:
064: /**
065: * The stack of system ids.
066: */
067: private Stack<URL> systemIds;
068:
069: /**
070: * The stack of public ids.
071: */
072: private Stack<String> publicIds;
073:
074: /**
075: * The current push-back reader.
076: */
077: private PushbackReader currentPbReader;
078:
079: /**
080: * The current line-number reader.
081: */
082: private LineNumberReader currentLineReader;
083:
084: /**
085: * The current system ID.
086: */
087: private URL currentSystemID;
088:
089: /**
090: * The current public ID.
091: */
092: private String currentPublicID;
093:
094: /**
095: * Creates a new reader using a string as input.
096: *
097: * @param str the string containing the XML data
098: */
099: public static IXMLReader stringReader(String str) {
100: return new StdXMLReader(new StringReader(str));
101: }
102:
103: /**
104: * Creates a new reader using a file as input.
105: *
106: * @param filename the name of the file containing the XML data
107: *
108: * @throws java.io.FileNotFoundException if the file could not be found
109: * @throws java.io.IOException if an I/O error occurred
110: */
111: public static IXMLReader fileReader(String filename)
112: throws FileNotFoundException, IOException {
113: IXMLReader reader = new StdXMLReader(new FileInputStream(
114: filename));
115: reader.setSystemID(filename);
116: return reader;
117: }
118:
119: /**
120: * Initializes the reader from a system and public ID.
121: *
122: * @param publicID the public ID which may be null.
123: * @param systemID the non-null system ID.
124: *
125: * @throws MalformedURLException if the system ID does not contain a valid URL
126: * @throws FileNotFoundException if the system ID refers to a local file which does not exist
127: * @throws IOException if an error occurred opening the stream
128: */
129: public StdXMLReader(String publicID, String systemID)
130: throws MalformedURLException, FileNotFoundException,
131: IOException {
132: URL systemIDasURL = null;
133:
134: try {
135: systemIDasURL = new URL(systemID);
136: } catch (MalformedURLException e) {
137: systemID = "file://" + systemID;
138:
139: try {
140: systemIDasURL = new URL(systemID);
141: } catch (MalformedURLException e2) {
142: throw e;
143: }
144: }
145:
146: Reader reader = this .openStream(publicID, systemIDasURL
147: .toString());
148: this .currentLineReader = new LineNumberReader(reader);
149: this .currentPbReader = new PushbackReader(
150: this .currentLineReader, 2);
151: this .pbreaders = new Stack<PushbackReader>();
152: this .linereaders = new Stack<LineNumberReader>();
153: this .publicIds = new Stack<String>();
154: this .systemIds = new Stack<URL>();
155: this .currentPublicID = publicID;
156: this .currentSystemID = systemIDasURL;
157: }
158:
159: /**
160: * Initializes the XML reader.
161: *
162: * @param reader the input for the XML data.
163: */
164: public StdXMLReader(Reader reader) {
165: this .currentLineReader = new LineNumberReader(reader);
166: this .currentPbReader = new PushbackReader(
167: this .currentLineReader, 2);
168: this .pbreaders = new Stack<PushbackReader>();
169: this .linereaders = new Stack<LineNumberReader>();
170: this .publicIds = new Stack<String>();
171: this .systemIds = new Stack<URL>();
172: this .currentPublicID = "";
173:
174: try {
175: this .currentSystemID = new URL("file:.");
176: } catch (MalformedURLException e) {
177: // never happens
178: }
179: }
180:
181: /**
182: * Cleans up the object when it's destroyed.
183: */
184: protected void finalize() throws Throwable {
185: this .currentLineReader = null;
186: this .currentPbReader = null;
187: this .pbreaders.clear();
188: this .pbreaders = null;
189: this .linereaders.clear();
190: this .linereaders = null;
191: this .publicIds.clear();
192: this .publicIds = null;
193: this .systemIds.clear();
194: this .systemIds = null;
195: this .currentPublicID = null;
196: super .finalize();
197: }
198:
199: /**
200: * Scans the encoding from an <?xml?> tag.
201: *
202: * @param str the first tag in the XML data.
203: *
204: * @return the encoding, or null if no encoding has been specified.
205: */
206: protected String getEncoding(String str) {
207: if (!str.startsWith("<?xml")) {
208: return null;
209: }
210:
211: int index = 5;
212:
213: while (index < str.length()) {
214: StringBuffer key = new StringBuffer();
215:
216: while ((index < str.length()) && (str.charAt(index) <= ' ')) {
217: index++;
218: }
219:
220: while ((index < str.length()) && (str.charAt(index) >= 'a')
221: && (str.charAt(index) <= 'z')) {
222: key.append(str.charAt(index));
223: index++;
224: }
225:
226: while ((index < str.length()) && (str.charAt(index) <= ' ')) {
227: index++;
228: }
229:
230: if ((index >= str.length()) || (str.charAt(index) != '=')) {
231: break;
232: }
233:
234: while ((index < str.length())
235: && (str.charAt(index) != '\'')
236: && (str.charAt(index) != '"')) {
237: index++;
238: }
239:
240: if (index >= str.length()) {
241: break;
242: }
243:
244: char delimiter = str.charAt(index);
245: index++;
246: int index2 = str.indexOf(delimiter, index);
247:
248: if (index2 < 0) {
249: break;
250: }
251:
252: if ("encoding".equals(key.toString())) {
253: return str.substring(index, index2);
254: }
255:
256: index = index2 + 1;
257: }
258:
259: return null;
260: }
261:
262: /**
263: * Converts a stream to a reader while detecting the encoding.
264: *
265: * @param stream the input for the XML data.
266: * @param charsRead buffer where to put characters that have been read
267: *
268: * @throws java.io.IOException if an I/O error occurred
269: */
270: protected Reader stream2reader(InputStream stream,
271: StringBuffer charsRead) throws IOException {
272: PushbackInputStream pbstream = new PushbackInputStream(stream);
273: int b = pbstream.read();
274:
275: switch (b) {
276: case 0x00:
277: case 0xFE:
278: case 0xFF:
279: pbstream.unread(b);
280: return new InputStreamReader(pbstream, "UTF-16");
281:
282: case 0xEF:
283: for (int i = 0; i < 2; i++) {
284: pbstream.read();
285: }
286:
287: return new InputStreamReader(pbstream, "UTF-8");
288:
289: case 0x3C:
290: b = pbstream.read();
291: charsRead.append('<');
292:
293: while ((b > 0) && (b != 0x3E)) {
294: charsRead.append((char) b);
295: b = pbstream.read();
296: }
297:
298: if (b > 0) {
299: charsRead.append((char) b);
300: }
301:
302: String encoding = this .getEncoding(charsRead.toString());
303:
304: if (encoding == null) {
305: return new InputStreamReader(pbstream, "UTF-8");
306: }
307:
308: charsRead.setLength(0);
309:
310: try {
311: return new InputStreamReader(pbstream, encoding);
312: } catch (UnsupportedEncodingException e) {
313: return new InputStreamReader(pbstream, "UTF-8");
314: }
315:
316: default:
317: charsRead.append((char) b);
318: return new InputStreamReader(pbstream, "UTF-8");
319: }
320: }
321:
322: /**
323: * Initializes the XML reader.
324: *
325: * @param stream the input for the XML data.
326: *
327: * @throws java.io.IOException if an I/O error occurred
328: */
329: public StdXMLReader(InputStream stream) throws IOException {
330: StringBuffer charsRead = new StringBuffer();
331: Reader reader = this .stream2reader(stream, charsRead);
332: this .currentLineReader = new LineNumberReader(reader);
333: this .currentPbReader = new PushbackReader(
334: this .currentLineReader, 2);
335: this .pbreaders = new Stack<PushbackReader>();
336: this .linereaders = new Stack<LineNumberReader>();
337: this .publicIds = new Stack<String>();
338: this .systemIds = new Stack<URL>();
339: this .currentPublicID = "";
340:
341: try {
342: this .currentSystemID = new URL("file:.");
343: } catch (MalformedURLException e) {
344: // never happens
345: }
346: this .startNewStream(new StringReader(charsRead.toString()));
347: }
348:
349: /**
350: * Reads a character.
351: *
352: * @return the character
353: *
354: * @throws java.io.IOException if no character could be read
355: */
356: public char read() throws IOException {
357: int ch = this .currentPbReader.read();
358:
359: while (ch < 0) {
360: if (this .pbreaders.empty()) {
361: throw new IOException("Unexpected EOF");
362: }
363:
364: this .currentPbReader.close();
365: this .currentPbReader = this .pbreaders.pop();
366: this .currentLineReader = this .linereaders.pop();
367: this .currentSystemID = this .systemIds.pop();
368: this .currentPublicID = this .publicIds.pop();
369: ch = this .currentPbReader.read();
370: }
371:
372: if (ch == 0x0D) { // CR
373: // using recursion could convert "\r\r\n" to "\n" (wrong),
374: // newline combo "\r\n" isn't normalized if it spans streams
375: // next 'read()' will pop pbreaders stack appropriately
376: ch = this .currentPbReader.read();
377:
378: if (ch != 0x0A && ch > 0) { // LF
379: this .currentPbReader.unread(ch);
380: }
381: return (char) 0x0A; // normalized: always LF
382: }
383:
384: return (char) ch;
385: }
386:
387: /**
388: * Returns true if the current stream has no more characters left to be read.
389: *
390: * @throws java.io.IOException if an I/O error occurred
391: */
392: public boolean atEOFOfCurrentStream() throws IOException {
393: int ch = this .currentPbReader.read();
394:
395: if (ch < 0) {
396: return true;
397: } else {
398: this .currentPbReader.unread(ch);
399: return false;
400: }
401: }
402:
403: /**
404: * Returns true if there are no more characters left to be read.
405: *
406: * @throws java.io.IOException if an I/O error occurred
407: */
408: public boolean atEOF() throws IOException {
409: int ch = this .currentPbReader.read();
410:
411: while (ch < 0) {
412: if (this .pbreaders.empty()) {
413: return true;
414: }
415:
416: this .currentPbReader.close();
417: this .currentPbReader = this .pbreaders.pop();
418: this .currentLineReader = this .linereaders.pop();
419: this .currentSystemID = this .systemIds.pop();
420: this .currentPublicID = this .publicIds.pop();
421: ch = this .currentPbReader.read();
422: }
423:
424: this .currentPbReader.unread(ch);
425: return false;
426: }
427:
428: /**
429: * Pushes the last character read back to the stream.
430: *
431: * @throws java.io.IOException if an I/O error occurred
432: */
433: public void unread(char ch) throws IOException {
434: this .currentPbReader.unread(ch);
435: }
436:
437: /**
438: * Opens a stream from a public and system ID.
439: *
440: * @param publicID the public ID, which may be null
441: * @param systemID the system ID, which is never null
442: *
443: * @throws java.net.MalformedURLException if the system ID does not contain a valid URL
444: * @throws java.io.FileNotFoundException if the system ID refers to a local file which does not
445: * exist
446: * @throws java.io.IOException if an error occurred opening the stream
447: */
448: public Reader openStream(String publicID, String systemID)
449: throws MalformedURLException, FileNotFoundException,
450: IOException {
451: URL url = new URL(this .currentSystemID, systemID);
452: StringBuffer charsRead = new StringBuffer();
453: Reader reader = this .stream2reader(url.openStream(), charsRead);
454:
455: if (charsRead.length() == 0) {
456: return reader;
457: }
458:
459: String charsReadStr = charsRead.toString();
460: PushbackReader pbreader = new PushbackReader(reader,
461: charsReadStr.length());
462: for (int i = charsReadStr.length() - 1; i >= 0; i--) {
463: pbreader.unread(charsReadStr.charAt(i));
464: }
465:
466: return pbreader;
467: }
468:
469: /**
470: * Starts a new stream from a Java reader. The new stream is used temporary to read data from.
471: * If that stream is exhausted, control returns to the parent stream.
472: *
473: * @param reader the non-null reader to read the new data from
474: */
475: public void startNewStream(Reader reader) {
476: this .pbreaders.push(this .currentPbReader);
477: this .linereaders.push(this .currentLineReader);
478: this .systemIds.push(this .currentSystemID);
479: this .publicIds.push(this .currentPublicID);
480: this .currentLineReader = new LineNumberReader(reader);
481: this .currentPbReader = new PushbackReader(
482: this .currentLineReader, 2);
483: }
484:
485: /**
486: * Returns the line number of the data in the current stream.
487: */
488: public int getLineNr() {
489: return this .currentLineReader.getLineNumber() + 1;
490: }
491:
492: /**
493: * Sets the system ID of the current stream.
494: *
495: * @param systemID the system ID
496: *
497: * @throws java.net.MalformedURLException if the system ID does not contain a valid URL
498: */
499: public void setSystemID(String systemID)
500: throws MalformedURLException {
501: this .currentSystemID = new URL(this .currentSystemID, systemID);
502: if (!this .systemIds.isEmpty()) {
503: this .systemIds.pop();
504: this .systemIds.push(this .currentSystemID);
505: }
506: }
507:
508: /**
509: * Sets the public ID of the current stream.
510: *
511: * @param publicID the public ID
512: */
513: public void setPublicID(String publicID) {
514: this .currentPublicID = publicID;
515: }
516:
517: /**
518: * Returns the current system ID.
519: */
520: public String getSystemID() {
521: return this .currentSystemID.toString();
522: }
523:
524: /**
525: * Returns the current public ID.
526: */
527: public String getPublicID() {
528: return this.currentPublicID;
529: }
530:
531: }
|