001: /*
002: License $Id: Parser.java,v 1.9 2005/07/06 18:31:00 hendriks73 Exp $
003:
004: Copyright (c) 2001-2005 tagtraum industries.
005:
006: LGPL
007: ====
008:
009: jo! is free software; you can redistribute it and/or
010: modify it under the terms of the GNU Lesser General Public
011: License as published by the Free Software Foundation; either
012: version 2.1 of the License, or (at your option) any later version.
013:
014: jo! is distributed in the hope that it will be useful,
015: but WITHOUT ANY WARRANTY; without even the implied warranty of
016: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017: Lesser General Public License for more details.
018:
019: You should have received a copy of the GNU Lesser General Public
020: License along with this library; if not, write to the Free Software
021: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022:
023: For LGPL see <http://www.fsf.org/copyleft/lesser.txt>
024:
025:
026: Sun license
027: ===========
028:
029: This release contains software by Sun Microsystems. Therefore
030: the following conditions have to be met, too. They apply to the
031: files
032:
033: - lib/mail.jar
034: - lib/activation.jar
035: - lib/jsse.jar
036: - lib/jcert.jar
037: - lib/jaxp.jar
038: - lib/crimson.jar
039: - lib/servlet.jar
040: - lib/jnet.jar
041: - lib/jaas.jar
042: - lib/jaasmod.jar
043:
044: contained in this release.
045:
046: a. Licensee may not modify the Java Platform
047: Interface (JPI, identified as classes contained within the javax
048: package or any subpackages of the javax package), by creating additional
049: classes within the JPI or otherwise causing the addition to or modification
050: of the classes in the JPI. In the event that Licensee creates any
051: Java-related API and distribute such API to others for applet or
052: application development, you must promptly publish broadly, an accurate
053: specification for such API for free use by all developers of Java-based
054: software.
055:
056: b. Software is confidential copyrighted information of Sun and
057: title to all copies is retained by Sun and/or its licensors. Licensee
058: shall not modify, decompile, disassemble, decrypt, extract, or otherwise
059: reverse engineer Software. Software may not be leased, assigned, or
060: sublicensed, in whole or in part. Software is not designed or intended
061: for use in on-line control of aircraft, air traffic, aircraft navigation
062: or aircraft communications; or in the design, construction, operation or
063: maintenance of any nuclear facility. Licensee warrants that it will not
064: use or redistribute the Software for such purposes.
065:
066: c. Software is provided "AS IS," without a warranty
067: of any kind. ALL EXPRESS OR IMPLIED REPRESENTATIONS AND WARRANTIES,
068: INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
069: PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED.
070:
071: d. This License is effective until terminated. Licensee may
072: terminate this License at any time by destroying all copies of Software.
073: This License will terminate immediately without notice from Sun if Licensee
074: fails to comply with any provision of this License. Upon such termination,
075: Licensee must destroy all copies of Software.
076:
077: e. Software, including technical data, is subject to U.S.
078: export control laws, including the U.S. Export Administration Act and its
079: associated regulations, and may be subject to export or import regulations
080: in other countries. Licensee agrees to comply strictly with all such
081: regulations and acknowledges that it has the responsibility to obtain
082: licenses to export, re-export, or import Software. Software may not be
083: downloaded, or otherwise exported or re-exported (i) into, or to a national
084: or resident of, Cuba, Iraq, Iran, North Korea, Libya, Sudan, Syria or any
085: country to which the U.S. has embargoed goods; or (ii) to anyone on the
086: U.S. Treasury Department's list of Specially Designated Nations or the U.S.
087: Commerce Department's Table of Denial Orders.
088:
089:
090: Feedback
091: ========
092:
093: We encourage your feedback and suggestions and want to use your feedback to
094: improve the Software. Send all such feedback to:
095: <feedback@tagtraum.com>
096:
097: For more information on tagtraum industries and jo!
098: please see <http://www.tagtraum.com/>.
099:
100:
101: */
102: package com.tagtraum.framework.markup;
103:
104: import com.tagtraum.framework.util.FileLocation;
105: import com.tagtraum.framework.util.StackedReader;
106:
107: import java.io.*;
108: import java.lang.reflect.Constructor;
109: import java.lang.reflect.Method;
110: import java.net.URL;
111: import java.util.Arrays;
112: import java.util.HashMap;
113:
114: /**
115: * This class is capable of reading textual data and recognizing certain
116: * parts of that text beginning with a 'magic' character. The default
117: * character used is '<'. All parts of the text are returned as
118: * {@link I_Element}s. The class allows to register certain element classes
119: * for tag starts.<p>
120: * Not recognized parts are returned as an instance of the default
121: * element class.
122: *
123: * @author <a href="mailto:hs@tagtraum.com">Hendrik Schreiber</a>
124: * @version 1.1beta1 $Id: Parser.java,v 1.9 2005/07/06 18:31:00 hendriks73 Exp $
125: */
126: public class Parser {
127:
128: /**
129: * Source-Version
130: */
131: public static String vcid = "$Id: Parser.java,v 1.9 2005/07/06 18:31:00 hendriks73 Exp $";
132:
133: /**
134: * Internal Reader.
135: */
136: protected PushbackReader myReader;
137: protected Reader myRawReader;
138:
139: // data organization. first shot. this should be a tree
140: protected HashMap myElementMap;
141: protected Constructor[] myElements;
142: protected String[] myTagStarts;
143: protected Constructor myDefaultElementConstructor;
144: protected int myLongestTagStartLength;
145: protected URL myURL;
146: protected int myMagicChar;
147:
148: /**
149: * Create a new Parser.
150: *
151: * @param aReader a Reader that supports <code>mark</code>
152: */
153: public Parser(Reader aReader) {
154: // make sure we are starting with line 1
155: if (aReader instanceof LineNumberReader)
156: ((LineNumberReader) aReader).setLineNumber(1);
157: myReader = null;
158: myRawReader = aReader;
159: myElementMap = new HashMap();
160:
161: try {
162: myDefaultElementConstructor = RawDataElement.class
163: .getConstructor(new Class[] { String.class });
164: } catch (NoSuchMethodException nsme) {
165: nsme.printStackTrace();
166: }
167:
168: myMagicChar = (int) '<';
169: }
170:
171: /**
172: * Creates a new Parser.
173: *
174: * @param aURL the URL is opened and the resulting InputStream is
175: * wrapped with an InputputStreamReader using the default encoding
176: */
177: public Parser(URL aURL) throws IOException {
178: this (new LineNumberReader(new InputStreamReader(aURL
179: .openStream())));
180:
181: setURL(aURL);
182: }
183:
184: /**
185: * Creates a new Parser.
186: *
187: * @param aURL the URL is opened and the resulting InputStream is
188: * wrapped with an InputputStreamReader using the specified encoding
189: * @param anEncoding the way data is encoded
190: */
191: public Parser(URL aURL, String anEncoding) throws IOException {
192: this (new LineNumberReader(new InputStreamReader(aURL
193: .openStream(), anEncoding)));
194:
195: setURL(aURL);
196: }
197:
198: /**
199: * Sets the default element Constructor. The default value for this is
200: * {@link RawDataElement}.
201: *
202: * @param aDefaultElementConstructor the default element Constructor
203: * @see #getDefaultElementConstructor()
204: */
205: public void setDefaultElementClass(
206: Constructor aDefaultElementConstructor) {
207: myDefaultElementConstructor = aDefaultElementConstructor;
208: }
209:
210: /**
211: * Returns the default element Constructor. The default value for this is
212: * {@link RawDataElement}.
213: *
214: * @return the default element Constructor
215: * @see #setDefaultElementClass
216: */
217: public Constructor getDefaultElementConstructor() {
218: return myDefaultElementConstructor;
219: }
220:
221: /**
222: * Sets the 'magic' character.
223: *
224: * @param aMagicChar the stop character
225: */
226: public void setMagicChar(char aMagicChar) {
227: myMagicChar = (int) aMagicChar;
228: }
229:
230: /**
231: * Returns the 'magic' character.
232: *
233: * @return the 'magic' character
234: */
235: public char getMagicChar() {
236: return (char) myMagicChar;
237: }
238:
239: /**
240: * Associates this parser with an URL.
241: *
242: * @param aURL the URL this Parser is parsing.
243: */
244: public void setURL(URL aURL) {
245: myURL = aURL;
246: }
247:
248: /**
249: * Returns the URL this parser is parsing. Note that this must
250: * have been set explicitly using {@link #setURL} unless you
251: * used the appropriate constructor.
252: *
253: * @return the URL this Parser is parsing.
254: */
255: public URL getURL() {
256: return myURL;
257: }
258:
259: /**
260: * Returns the next {@link I_Element}.
261: *
262: * return the next element or <code>null</code> if EOF is reached
263: */
264: public I_Element nextElement() throws IOException {
265: if (myReader == null) {
266: synchronized (this ) {
267: if (myReader == null) {
268: myReader = new PushbackReader(myRawReader, 256);
269: }
270: }
271: }
272:
273: int c = myReader.read();
274: I_Element theElement = null;
275:
276: if (c != -1 && c != myMagicChar) {
277: myReader.unread(c);
278: theElement = getDefaultElement();
279: if (myRawReader instanceof StackedReader) {
280: final StackedReader stackedReader = (StackedReader) myRawReader;
281: int lineNumber = stackedReader.getLineNumber();
282: // fix linenumber
283: if (c == '\n')
284: lineNumber--;
285: setFileLocation(lineNumber, theElement);
286: }
287: theElement.parse(myReader);
288: } else {
289: if (c == -1) {
290: return null; // EOF
291: }
292:
293: try {
294: final int lineNumber = getLineNumber();
295: theElement = getElement();
296: if (theElement != null) {
297: setFileLocation(lineNumber, theElement);
298: theElement.parse(myReader);
299: } else {
300: myReader.unread(c);
301: theElement = getDefaultElement();
302: setFileLocation(theElement);
303: theElement.parse(myReader);
304: }
305: } catch (EOFException oefe) {
306: }
307: }
308: return theElement;
309: }
310:
311: private int getLineNumber() {
312: if (myRawReader instanceof StackedReader) {
313: final StackedReader stackedReader = (StackedReader) myRawReader;
314: return stackedReader.getLineNumber();
315: }
316: return -1;
317: }
318:
319: private void setFileLocation(I_Element theElement) {
320: if (myRawReader instanceof StackedReader) {
321: final StackedReader stackedReader = (StackedReader) myRawReader;
322: final int lineNumber = stackedReader.getLineNumber();
323: setFileLocation(lineNumber, theElement);
324: }
325: }
326:
327: private void setFileLocation(final int lineNumber,
328: I_Element theElement) {
329: if (myRawReader instanceof StackedReader) {
330: final StackedReader stackedReader = (StackedReader) myRawReader;
331: theElement.setLine(lineNumber);
332: try {
333: FileLocation location = (FileLocation) stackedReader
334: .getCurrentContextObject().clone();
335: location.setLineNumber(lineNumber);
336: theElement.setFileLocation(location);
337: } catch (CloneNotSupportedException e) {
338: e.printStackTrace(); //should never happen
339: }
340: }
341: }
342:
343: /**
344: * Returns a new instance of the default element.
345: * It usually repersents raw data.
346: *
347: * @return an {@link I_Element}
348: * @see RawDataElement
349: */
350: protected I_Element getDefaultElement() throws IOException {
351: try {
352: return (I_Element) myDefaultElementConstructor
353: .newInstance(new Object[] { null });
354: } catch (Exception e) {
355: throw new ParserException(
356: "Failed to instantiate default element of type "
357: + myDefaultElementConstructor);
358: }
359: }
360:
361: /**
362: * Read first characters and return the appropiate {@link I_Element}.
363: *
364: * @return an Element or null if EOF
365: */
366: protected I_Element getElement() throws IOException {
367: char[] buf = new char[myLongestTagStartLength];
368: int readChars = 0;
369: for (int justRead = 0; justRead >= 0
370: && readChars < myLongestTagStartLength;) {
371: justRead = myReader.read(buf, readChars,
372: myLongestTagStartLength - readChars);
373: if (justRead > 0)
374: readChars += justRead;
375: }
376: if (readChars == 0) {
377: // end of file
378: throw new EOFException();
379: }
380:
381: String theTagStart = new String(buf, 0, readChars);
382:
383: for (int i = myTagStarts.length - 1; i > -1; i--) {
384: // this is a linear search and could be optimized with a tree
385: if (theTagStart.startsWith(myTagStarts[i])) {
386: myReader.unread(buf, 0, readChars);
387: try {
388: return (I_Element) myElements[i]
389: .newInstance(new Object[] { (Object) myTagStarts[i] });
390: } catch (Exception e) {
391: throw new ParserException(
392: "Failed to instantiate tag of type "
393: + myElements[i]);
394: }
395: }
396: }
397: myReader.unread(buf);
398: return null;
399: }
400:
401: /**
402: * Registers an Element class.
403: *
404: * @param anElementClass an Element class
405: * @throws ParserException if the class does not have a method called "getTagStart"
406: */
407: public void addElementClass(Class anElementClass, String aTagStart)
408: throws ParserException, NoSuchMethodException {
409: myElementMap.put(aTagStart, anElementClass
410: .getConstructor(new Class[] { String.class }));
411: reorderTagStarts();
412: }
413:
414: /**
415: * Un-Registers an Element class.
416: *
417: * @throws ParserException if the class does not have a method called "getTagStart"
418: */
419: public void removeElementClass(String aTagStart)
420: throws ParserException {
421: if (myElementMap.remove(aTagStart) != null) {
422: reorderTagStarts();
423: }
424: }
425:
426: /**
427: * Returns an Element class.
428: *
429: * @param aTagStart String the class is registered under
430: * @see Tag#getTagStart()
431: */
432: public Constructor getElementConstructor(String aTagStart) {
433: return (Constructor) myElementMap.get(aTagStart);
434: }
435:
436: /**
437: * Closes the underlying reader.
438: */
439: public void close() throws IOException {
440: if (myReader != null) {
441: myReader.close();
442: myReader = null;
443: }
444: }
445:
446: /**
447: * Order TagStarts and ElementClasses to get the longest match first.
448: */
449: protected void reorderTagStarts() {
450: myLongestTagStartLength = 0;
451: myTagStarts = new String[myElementMap.size()];
452: myTagStarts = (String[]) myElementMap.keySet().toArray(
453: myTagStarts);
454:
455: Arrays.sort(myTagStarts); // myTagStarts must be ordered by length, longest first
456:
457: myElements = new Constructor[myTagStarts.length];
458:
459: for (int i = 0; i < myElements.length; i++) {
460: myElements[i] = (Constructor) myElementMap
461: .get(myTagStarts[i]);
462:
463: if (myTagStarts[i].length() > myLongestTagStartLength) {
464: myLongestTagStartLength = myTagStarts[i].length();
465: }
466: }
467: }
468:
469: /**
470: * Returns the value of the method <code>getTagStart()</code> of an
471: * element class.
472: */
473: protected String getTagStart(Class aClass) throws ParserException {
474: try {
475: Object o = aClass.newInstance();
476: Class[] paramType = new Class[0];
477: Object[] args = new Object[0];
478: Method theMethod = aClass.getMethod("getTagStart",
479: paramType);
480:
481: return (String) theMethod.invoke(o, args);
482: } catch (Exception e) {
483: throw new IllegalArgumentException(
484: "Failed to invoke getTagStart() of " + aClass);
485: }
486: }
487:
488: /**
489: * Tests.
490: */
491: public static void main(String[] args) throws Exception {
492: Parser theParser = new Parser(new BufferedReader(
493: new FileReader(args[0])));
494:
495: theParser.addElementClass(
496: com.tagtraum.framework.markup.Tag.class, "");
497: theParser.addElementClass(
498: com.tagtraum.framework.markup.Comment.class, "!--");
499: theParser.addElementClass(
500: com.tagtraum.framework.markup.SSITag.class, "!--#");
501:
502: I_Element theElement = null;
503: int length = 0;
504:
505: while ((theElement = theParser.nextElement()) != null) {
506: System.out.println(theElement);
507:
508: length += theElement.getLength();
509: }
510:
511: System.out.println();
512: System.out.println("Parsed Length: " + length);
513:
514: File theFile = new File(args[0]);
515:
516: System.out.println("Real Length : " + theFile.length());
517: }
518:
519: }
|