001: /*
002: * Project: BeautyJ - Customizable Java Source Code Transformer
003: * Class: de.gulden.util.javasource.SourceParser
004: * Version: 1.1
005: *
006: * Date: 2004-09-29
007: *
008: * Note: Contains auto-generated Javadoc comments created by BeautyJ.
009: *
010: * This is licensed under the GNU General Public License (GPL)
011: * and comes with NO WARRANTY. See file license.txt for details.
012: *
013: * Author: Jens Gulden
014: * Email: beautyj@jensgulden.de
015: */
016:
017: package de.gulden.util.javasource;
018:
019: import de.gulden.util.javasource.sourclet.Sourclet;
020: import de.gulden.util.javasource.jjt.Node;
021: import de.gulden.util.javasource.jjt.*;
022: import javax.xml.parsers.*;
023: import org.w3c.dom.*;
024: import org.xml.sax.SAXException;
025: import java.io.*;
026: import java.util.*;
027:
028: /**
029: * SourceParser main utitility class.
030: * This class provides functionality to parse Java source codes and build
031: * a tree of objects representing the source elements.
032: * This tree of objects can be output as XML or, instead of parsing Java
033: * sources, be parsed from previously generated XML.
034: *
035: * @author Jens Gulden
036: * @version 1.1
037: * @see de.gulden.util.javasource.sourclet.Sourclet
038: * @see de.gulden.util.javasource.sourclet.standard.StandardSourclet
039: */
040: public class SourceParser implements ParserTreeConstants {
041:
042: // ------------------------------------------------------------------------
043: // --- final static fields ---
044: // ------------------------------------------------------------------------
045:
046: /**
047: * Version
048: */
049: public static final String VERSION = "1.1";
050:
051: /**
052: * Constant workaroundUnicodeSingleCharMarker.
053: */
054: protected static final String workaroundUnicodeSingleCharMarker = "-"
055: + "unicodechar" + "-";
056:
057: // ------------------------------------------------------------------------
058: // --- static fields ---
059: // ------------------------------------------------------------------------
060:
061: /**
062: * Linefeed.
063: */
064: public static String nl = System.getProperty("line.separator");
065:
066: /**
067: * Flag specifying whether to include a DTD reference (<!DOCTYPE..>) into generated XML.
068: * Externally set.
069: */
070: public static boolean includeXMLDoctype = false;
071:
072: /**
073: * Flag specifying whether to validate an XML file against its DTD before it is parsed.
074: * Externally set.
075: */
076: public static boolean validateXML = false;
077:
078: /**
079: * Global verbose flag.
080: */
081: public static boolean verbose = false;
082:
083: /**
084: * Log performer, may be set externally.
085: */
086: public static LogPerformer logPerformer = LogPerformer.DEFAULT;
087:
088: /**
089: * Document builder for parsing XML.
090: * Will be initialized when first used.
091: */
092: protected static DocumentBuilder documentBuilder;
093:
094: // ------------------------------------------------------------------------
095: // --- static methods ---
096: // ------------------------------------------------------------------------
097:
098: /**
099: * Create object tree from Java source inputs.
100: *
101: * @throws IOException if an i/o error occurs
102: * @return Root package (named "") containing all other packages with classes.
103: */
104: public static Package parse(File file, ProgressTracker pt)
105: throws IOException, ParseException {
106: return parse(new File[] { file }, pt);
107: }
108:
109: /**
110: * Create object tree from Java source inputs.
111: *
112: * @param files A list of files and/or directories. Any .java-file will be parsed, any other ignored.
113: * @throws IOException if an i/o error occurs
114: * @return Root package (named "") containing all other packages with classes.
115: */
116: public static Package parse(File[] files, ProgressTracker pt)
117: throws IOException, ParseException {
118: Package base = new Package();
119: parse(files, base, pt);
120: return base;
121: }
122:
123: /**
124: * Parses files and adds the parsed objects to the specified base package.
125: *
126: * @throws IOException if an i/o error occurs
127: */
128: public static void parse(File[] files, Package basePackage,
129: ProgressTracker pt) throws IOException, ParseException {
130: parsePass1(basePackage, files, pt);
131: analysePass2(basePackage, pt);
132: }
133:
134: /**
135: * Parses a file and adds the parsed objects to the specified bas package.
136: *
137: * @throws IOException if an i/o error occurs
138: */
139: public static void parse(File file, Package basePackage,
140: ProgressTracker pt) throws IOException, ParseException {
141: parse(new File[] { file }, basePackage, pt);
142: }
143:
144: /**
145: * Create object tree from Java source inputs.
146: *
147: * @throws IOException if an i/o error occurs
148: * @return Base package (named "") containing all other packages with classes.
149: */
150: public static Package parse(String[] filenames, ProgressTracker pt)
151: throws IOException, ParseException {
152: File[] f = new File[filenames.length];
153: for (int i = 0; i < filenames.length; i++) {
154: f[i] = new File(filenames[i]);
155: }
156: return parse(f, pt);
157: }
158:
159: /**
160: * Create object tree from XML input, previously created from parsed .java-files.
161: *
162: * @throws IOException if an i/o error occurs
163: * @throws SAXException if an XML parser error occurs
164: * @return Base package (named "") containing all other packages with classes.
165: * @see #buildXML
166: */
167: public static Package parseXML(InputStream in) throws IOException,
168: SAXException {
169: Document doc = getDocumentBuilder().parse(in);
170: Package p = new Package();
171: Element e = doc.getDocumentElement();
172: p.initFromXML(e); // <xjava> may be treated as base package
173: return p;
174: }
175:
176: /**
177: * Output an object tree of source code elements to XML.
178: *
179: * @param p Package containing all other packages with classes that are to be converted to XML.
180: * @return The XML DOM-document.
181: * @see #parseXML
182: */
183: public static Document buildXML(Package p) {
184: DOMImplementation domImplementation = getDocumentBuilder()
185: .getDOMImplementation();
186: DocumentType doctype;
187: if (includeXMLDoctype) {
188: doctype = domImplementation.createDocumentType("xjava",
189: null, "xjava.dtd");
190: } else {
191: doctype = null;
192: }
193: Document doc = domImplementation.createDocument(null, "xjava",
194: doctype);
195: Element root = doc.getDocumentElement();
196: Element xml = p.buildXML(doc);
197: if (root != xml) { // xml may be tag 'xjava', created by base package
198: root.appendChild(xml);
199: }
200: root.setAttribute("version", VERSION);
201: return doc;
202: }
203:
204: /**
205: * Output object tree of source code elements as Java source files,
206: * applying a Sourclet for formatting the code.
207: *
208: * @param p Package containing all other packages with classes that are to be output as formatted source code.
209: * @param dir Base directory where to output .java-files. A directory structure matching the classes' packages structure will be created.
210: * @param sourclet The Sourclet to use for formatting the output.
211: * @throws IOException if an i/o error occurs
212: */
213: public static void buildSource(Package p, File dir, File[] sources,
214: Sourclet sourclet) throws IOException {
215: // classes
216: NamedIterator it = p.getClasses();
217: while (it.hasMore()) {
218: Class c = (Class) it.next();
219: if ((sources == null) || (c.getSource() == null)
220: || isInSources(new File(c.getSource()), sources)) { // if originating from files, suppress building those sources that have only been loaded for referencing classes, but not been specified as inputs
221: String classname = c.getUnqualifiedName();
222: File file = new File(dir, classname + ".java");
223: log("writing " + file.getPath());
224: FileOutputStream f = new FileOutputStream(file);
225: BufferedOutputStream buf = new BufferedOutputStream(f); // this might cause little optimization, as we are writing many small bits in sequence to the stream
226: sourclet.buildSource(buf, c);
227: buf.close();
228: }
229: }
230:
231: // inner packages
232: it = p.getInnerPackages();
233: while (it.hasMore()) {
234: Package pp = (Package) it.next();
235: String pname = pp.getUnqualifiedName();
236: File indir = new File(dir, pname);
237: boolean created = indir.mkdir();
238: if (created) {
239: log("directory " + indir.getPath() + " created");
240: }
241: buildSource(pp, indir, sources, sourclet);
242: }
243: }
244:
245: /**
246: * Tool function: indent a multi-line string by <i>depth</i> blank characters in front of each line.
247: */
248: public static String indent(String s, int depth) {
249: StringBuffer sb = new StringBuffer();
250: StringTokenizer st = new StringTokenizer(s, "\n");
251: while (st.hasMoreTokens()) {
252: sb.append(repeat(" ", depth) + st.nextToken()
253: + (st.hasMoreTokens() ? "\n" : ""));
254: }
255: return sb.toString();
256: }
257:
258: /**
259: * Tool function: create a new String which contains <i>s</i> repeated <i>c</i> times.
260: */
261: public static String repeat(String s, int c) {
262: if (c > 0) {
263: StringBuffer sb = new StringBuffer(s);
264: for (int i = 1; i < c; i++) {
265: sb.append(s);
266: }
267: return sb.toString();
268: } else {
269: return "";
270: }
271: }
272:
273: /**
274: * Tool function: replace any occurrence of <i>old</i> in <i>s</i> with <i>neu</i>.
275: */
276: public static String replace(String s, String old, String neu) {
277: int pos = s.indexOf(old);
278: if (pos != -1) {
279: return s.substring(0, pos)
280: + neu
281: + replace(s.substring(pos + old.length()), old, neu);
282: } else {
283: return s;
284: }
285: }
286:
287: /**
288: * Restores manipualted Java source code which avoided single-char unicode characters
289: * back to the original code.
290: * Called only from Code.java.
291: *
292: * @param s manipulated Java source string, as returned from workaroundAvoidUnicodeSingleChar()
293: * @return the original Java source code, as it had been passed as input to workaroundAvoidUnicodeSingleChar()
294: * @see #workaroundAvoidUnicodeSingleChar(String)
295: */
296: public static String workaroundRestoreUnicodeSingleChar(String s) {
297: int pos = s.indexOf("\"" + workaroundUnicodeSingleCharMarker);
298: int l = workaroundUnicodeSingleCharMarker.length();
299: if (pos != -1) {
300: return s.substring(0, pos)
301: + "'\\u"
302: + s.substring(pos + (l + 1), pos + (l + 5))
303: + "'"
304: + workaroundRestoreUnicodeSingleChar(s
305: .substring(pos + (l + 6)));
306: } else {
307: return s;
308: }
309: }
310:
311: /**
312: * Creates XML document builder.
313: */
314: protected static DocumentBuilder getDocumentBuilder() {
315: if (documentBuilder == null) {
316: // init on demand
317: DocumentBuilderFactory dbf = DocumentBuilderFactory
318: .newInstance();
319: dbf.setIgnoringComments(true);
320: dbf.setExpandEntityReferences(validateXML);
321: dbf.setValidating(validateXML); // seems to have no effect, always true (?)
322: try {
323: documentBuilder = dbf.newDocumentBuilder();
324: } catch (ParserConfigurationException pce) {
325: System.out
326: .println("ERROR: cannot initialize XML parser - "
327: + pce.getMessage());
328: // program will exit with NullPointerException after return from this method
329: }
330: }
331: return documentBuilder;
332: }
333:
334: /**
335: *
336: * @throws IOException if an i/o error occurs
337: */
338: protected static void parsePass1(Package basePackage, File[] files,
339: ProgressTracker pt) throws IOException, ParseException {
340: Vector todoFiles = new Vector();
341: for (int i = 0; i < files.length; i++) {
342: if (files[i].exists()) {
343: if (files[i].isFile()) {
344: String filename = files[i].getName();
345: if (filename.endsWith(".java")) {
346: if (pt != null) {
347: pt.todo(4);
348: }
349: todoFiles.addElement(files[i]);
350: } else {
351: //nop, ignore other file types
352: }
353: } else if (files[i].isDirectory()) {
354: String[] list = files[i].list();
355: File[] ff = new File[list.length];
356: for (int j = 0; j < list.length; j++) {
357: File ffile = new File(files[i], list[j]);
358: ff[j] = ffile;
359: }
360: parsePass1(basePackage, ff, pt); //recursion
361: }
362: } else {
363: warning("warning: can't find input file/directory "
364: + files[i].getPath() + ", ignoring");
365: }
366: }
367:
368: for (Enumeration e = todoFiles.elements(); e.hasMoreElements();) {
369: File f = (File) e.nextElement();
370: log("parsing pass 1: " + f.getPath());
371: String code = readFile(f);
372: // workaround 1: avoid \r
373: code = code.replace('\r', ' ');
374: // workaround 2: parser would resolve unicode character declarations ' \ u xxxx ', so change them to a pseudo-string (this is quite an ugly workaround...)
375: // need to call workarondUnicodeRestore() after parsing code blocks
376: // Another workaround for a parser bug: if last line of a source file is a single-line-comment without ending line-break, the parser will crash
377: // so append a 'safety-linefeed' after each input file, it can't do any harm (see JavaCC FAQ 3.15)
378: code = workaroundAvoidUnicodeSingleChar(code) + nl;
379: InputStream in = new StringBufferInputStream(code);
380: analysePass1(basePackage, in, f.getAbsolutePath(), pt);
381: }
382: }
383:
384: /**
385: * Parsing pass 1.<br>
386: * This calls the parser generated by JavaCC and converts input source
387: * code into an object-structure.<br>
388: *
389: * @throws IOException if an i/o error occurs
390: * @see #analysePass2
391: */
392: protected static void analysePass1(Package basePackage,
393: InputStream in, String source, ProgressTracker pt)
394: throws IOException, ParseException {
395: Node rootnode;
396: Node[] nodes;
397:
398: // syntax parse
399: rootnode = Parser.parse(in, source);
400:
401: if (pt != null) {
402: pt.done(3); // pass1 weighted as 3, pass2 as 1
403: }
404:
405: // semantic analysis: create class structure
406:
407: // package
408: Package pakkage;
409: Node pak = rootnode.getChild(JJT_PACKAGE);
410: if (pak != null) {
411: pakkage = new Package();
412: pakkage.initFromAST(pak);
413: } else {
414: pakkage = basePackage;
415: }
416:
417: // imports
418: nodes = rootnode.getChildren(JJT_IMPORT);
419: Vector imports = new Vector();
420: for (int i = 0; i < nodes.length; i++) {
421: imports.addElement(Import.createFromAST(basePackage,
422: nodes[i]));
423: }
424:
425: // interfaces
426: nodes = rootnode.getChildren(JJT_INTERFACE);
427: for (int i = 0; i < nodes.length; i++) {
428: Class c = new Class();
429: c.setPackage(pakkage);
430: NamedIterator it = c.getImports();
431: for (Enumeration e = imports.elements(); e
432: .hasMoreElements();) {
433: Import im = (Import) e.nextElement();
434: it.add(im);
435: }
436: c.setInterface(true);
437: c.initFromAST(nodes[i]); // pass 1 only
438: }
439:
440: // classes
441: nodes = rootnode.getChildren(JJT_CLASS);
442: for (int i = 0; i < nodes.length; i++) {
443: Class c = new Class();
444: c.setPackage(pakkage);
445: NamedIterator it = c.getImports();
446: for (Enumeration e = imports.elements(); e
447: .hasMoreElements();) {
448: Import im = (Import) e.nextElement();
449: it.add(im);
450: }
451: c.initFromAST(nodes[i]); // pass 1 only
452: }
453:
454: basePackage.add(pakkage);
455: }
456:
457: /**
458: * Parsing pass 2.<br>
459: * Now that all classes in packages are already known,
460: * unqualified references can be qualified clearly.
461: * So let classes/ interfaces perform their 'real' initialization now.
462: *
463: * @throws IOException if an i/o error occurs
464: * @see #analysePass1
465: */
466: protected static void analysePass2(Package pack, ProgressTracker pt)
467: throws IOException, ParseException {
468: NamedIterator it;
469: it = pack.getClasses();
470: while (it.hasMore()) {
471: Class c = (Class) it.next();
472: if (!c.pass2) { // (ask here to suppress message)
473: log("parsing pass 2: " + c.getName());
474: c.initFromASTPass2();
475: }
476: if (pt != null) {
477: pt.done(1); // pass1 weighted as 3, pass2 as 1
478: }
479: }
480: // perform this recursively on all inner packages
481: it = pack.getInnerPackages();
482: while (it.hasMore()) {
483: Package p = (Package) it.next();
484: analysePass2(p, pt);
485: }
486: }
487:
488: /**
489: * Replaces all occurrences of single-character-constants using unicode
490: * with a pseudo-string. This way, the parser does not resolve the unicode char.
491: * This is quite an ugly workaround, but usually not too costy, as single unicode chars
492: * are rarely used.
493: *
494: * @param s Java source code string, maybe containg single-char unicode constants.
495: * @return manipulated Java source string
496: * @see #workaroundRestoreUnicodeSingleChar(String)
497: */
498: protected static String workaroundAvoidUnicodeSingleChar(String s) {
499: int pos = s.indexOf("'\\u");
500: if (pos != -1) {
501: // make sure this is not inside a string constant
502: int linestart = s.lastIndexOf(nl, pos) + 1; // will result in 0 for 'not found' which is wanted
503: char q = endsQuoted(s.substring(linestart, pos));
504: if (q == (char) 0) {
505: return s.substring(0, pos)
506: + "\""
507: + workaroundUnicodeSingleCharMarker
508: + s.substring(pos + 3, pos + 7)
509: + "\""
510: + workaroundAvoidUnicodeSingleChar(s
511: .substring(pos + 8));
512: } else {
513: int qe = quoteEnd(s, pos, q);
514: return s.substring(0, qe + 1)
515: + workaroundAvoidUnicodeSingleChar(s
516: .substring(qe + 1));
517: }
518: } else {
519: return s;
520: }
521: }
522:
523: /**
524: *
525: * @throws IOException if an i/o error occurs
526: */
527: protected static String readFile(File f) throws IOException {
528: FileReader r = new FileReader(f);
529: char[] c = new char[(int) f.length()];
530: r.read(c);
531: r.close();
532: return new String(c);
533: }
534:
535: protected static char endsQuoted(String s) {
536: char[] cc = new char[s.length()];
537: s.getChars(0, cc.length, cc, 0);
538: boolean escaped = false;
539: char quoted = (char) 0;
540:
541: for (int i = 0; i < cc.length; i++) {
542: char c = cc[i];
543: if (escaped) {
544: escaped = false;
545: } else {
546: switch (c) {
547: case '\\':
548: escaped = true;
549: break;
550: case '\"':
551: switch (quoted) {
552: case '\'':
553: break;
554: case '\"':
555: quoted = (char) 0; // unquote again
556: break;
557: default:
558: quoted = '\"';
559: break;
560: }
561: break;
562: case '\'':
563: switch (quoted) {
564: case '\"':
565: break;
566: case '\'':
567: quoted = (char) 0; // unquote again
568: break;
569: default:
570: quoted = '\'';
571: break;
572: }
573: break;
574: }
575: }
576: }
577: return quoted;
578: }
579:
580: protected static int quoteEnd(String s, int pos, char quoteChar) {
581: boolean escaped = false;
582: while (pos < s.length()) {
583: if (escaped) {
584: escaped = false;
585: } else {
586: char c = s.charAt(pos);
587: if (c == quoteChar) {
588: return pos;
589: } else if (c == '\\') {
590: escaped = true;
591: }
592: }
593: pos++;
594: }
595: return -1;
596: }
597:
598: /**
599: * Outputs a log message if the verbose-flag is set..
600: *
601: * @param msg The log message string.
602: */
603: protected static void log(String msg) {
604: if (verbose) {
605: logPerformer.log(msg);
606: }
607: }
608:
609: /**
610: * Outputs a warning message, which is the same as outputting a log message, but is performed even is verbose==false.
611: *
612: * @param msg The warning message string.
613: */
614: protected static void warning(String msg) {
615: logPerformer.log(msg);
616: }
617:
618: private static boolean isInSources(File f, File[] sources) {
619: try {
620: String fc = f.getCanonicalPath();
621: for (int i = 0; i < sources.length; i++) {
622: String ffc = sources[i].getCanonicalPath();
623: if (fc.startsWith(ffc)) { // or equal
624: return true;
625: }
626: }
627: return false;
628: } catch (IOException ioe) {
629: return false;
630: }
631: }
632:
633: } // end SourceParser
|