001: /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
002: *
003: * ***** BEGIN LICENSE BLOCK *****
004: * Version: MPL 1.1/GPL 2.0
005: *
006: * The contents of this file are subject to the Mozilla Public License Version
007: * 1.1 (the "License"); you may not use this file except in compliance with
008: * the License. You may obtain a copy of the License at
009: * http://www.mozilla.org/MPL/
010: *
011: * Software distributed under the License is distributed on an "AS IS" basis,
012: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
013: * for the specific language governing rights and limitations under the
014: * License.
015: *
016: * The Original Code is Rhino code, released
017: * May 6, 1999.
018: *
019: * The Initial Developer of the Original Code is
020: * Netscape Communications Corporation.
021: * Portions created by the Initial Developer are Copyright (C) 1997-1999
022: * the Initial Developer. All Rights Reserved.
023: *
024: * Contributor(s):
025: * Mike Ang
026: * Igor Bukanov
027: * Bob Jervis
028: * Mike McCabe
029: *
030: * Alternatively, the contents of this file may be used under the terms of
031: * the GNU General Public License Version 2 or later (the "GPL"), in which
032: * case the provisions of the GPL are applicable instead of those above. If
033: * you wish to allow use of your version of this file only under the terms of
034: * the GPL and not to allow others to use your version of this file under the
035: * MPL, indicate your decision by deleting the provisions above and replacing
036: * them with the notice and other provisions required by the GPL. If you do
037: * not delete the provisions above, a recipient may use your version of this
038: * file under either the MPL or the GPL.
039: *
040: * ***** END LICENSE BLOCK ***** */
041:
042: package org.mozilla.javascript;
043:
044: /**
045: * The following class save decompilation information about the source.
046: * Source information is returned from the parser as a String
047: * associated with function nodes and with the toplevel script. When
048: * saved in the constant pool of a class, this string will be UTF-8
049: * encoded, and token values will occupy a single byte.
050:
051: * Source is saved (mostly) as token numbers. The tokens saved pretty
052: * much correspond to the token stream of a 'canonical' representation
053: * of the input program, as directed by the parser. (There were a few
054: * cases where tokens could have been left out where decompiler could
055: * easily reconstruct them, but I left them in for clarity). (I also
056: * looked adding source collection to TokenStream instead, where I
057: * could have limited the changes to a few lines in getToken... but
058: * this wouldn't have saved any space in the resulting source
059: * representation, and would have meant that I'd have to duplicate
060: * parser logic in the decompiler to disambiguate situations where
061: * newlines are important.) The function decompile expands the
062: * tokens back into their string representations, using simple
063: * lookahead to correct spacing and indentation.
064: *
065: * Assignments are saved as two-token pairs (Token.ASSIGN, op). Number tokens
066: * are stored inline, as a NUMBER token, a character representing the type, and
067: * either 1 or 4 characters representing the bit-encoding of the number. String
068: * types NAME, STRING and OBJECT are currently stored as a token type,
069: * followed by a character giving the length of the string (assumed to
070: * be less than 2^16), followed by the characters of the string
071: * inlined into the source string. Changing this to some reference to
072: * to the string in the compiled class' constant pool would probably
073: * save a lot of space... but would require some method of deriving
074: * the final constant pool entry from information available at parse
075: * time.
076: */
077: public class Decompiler {
078: /**
079: * Flag to indicate that the decompilation should omit the
080: * function header and trailing brace.
081: */
082: public static final int ONLY_BODY_FLAG = 1 << 0;
083:
084: /**
085: * Flag to indicate that the decompilation generates toSource result.
086: */
087: public static final int TO_SOURCE_FLAG = 1 << 1;
088:
089: /**
090: * Decompilation property to specify initial ident value.
091: */
092: public static final int INITIAL_INDENT_PROP = 1;
093:
094: /**
095: * Decompilation property to specify default identation offset.
096: */
097: public static final int INDENT_GAP_PROP = 2;
098:
099: /**
100: * Decompilation property to specify identation offset for case labels.
101: */
102: public static final int CASE_GAP_PROP = 3;
103:
104: // Marker to denote the last RC of function so it can be distinguished from
105: // the last RC of object literals in case of function expressions
106: private static final int FUNCTION_END = Token.LAST_TOKEN + 1;
107:
108: String getEncodedSource() {
109: return sourceToString(0);
110: }
111:
112: int getCurrentOffset() {
113: return sourceTop;
114: }
115:
116: int markFunctionStart(int functionType) {
117: int savedOffset = getCurrentOffset();
118: addToken(Token.FUNCTION);
119: append((char) functionType);
120: return savedOffset;
121: }
122:
123: int markFunctionEnd(int functionStart) {
124: int offset = getCurrentOffset();
125: append((char) FUNCTION_END);
126: return offset;
127: }
128:
129: void addToken(int token) {
130: if (!(0 <= token && token <= Token.LAST_TOKEN))
131: throw new IllegalArgumentException();
132:
133: append((char) token);
134: }
135:
136: void addEOL(int token) {
137: if (!(0 <= token && token <= Token.LAST_TOKEN))
138: throw new IllegalArgumentException();
139:
140: append((char) token);
141: append((char) Token.EOL);
142: }
143:
144: void addName(String str) {
145: addToken(Token.NAME);
146: appendString(str);
147: }
148:
149: void addString(String str) {
150: addToken(Token.STRING);
151: appendString(str);
152: }
153:
154: void addRegexp(String regexp, String flags) {
155: addToken(Token.REGEXP);
156: appendString('/' + regexp + '/' + flags);
157: }
158:
159: void addNumber(double n) {
160: addToken(Token.NUMBER);
161:
162: /* encode the number in the source stream.
163: * Save as NUMBER type (char | char char char char)
164: * where type is
165: * 'D' - double, 'S' - short, 'J' - long.
166:
167: * We need to retain float vs. integer type info to keep the
168: * behavior of liveconnect type-guessing the same after
169: * decompilation. (Liveconnect tries to present 1.0 to Java
170: * as a float/double)
171: * OPT: This is no longer true. We could compress the format.
172:
173: * This may not be the most space-efficient encoding;
174: * the chars created below may take up to 3 bytes in
175: * constant pool UTF-8 encoding, so a Double could take
176: * up to 12 bytes.
177: */
178:
179: long lbits = (long) n;
180: if (lbits != n) {
181: // if it's floating point, save as a Double bit pattern.
182: // (12/15/97 our scanner only returns Double for f.p.)
183: lbits = Double.doubleToLongBits(n);
184: append('D');
185: append((char) (lbits >> 48));
186: append((char) (lbits >> 32));
187: append((char) (lbits >> 16));
188: append((char) lbits);
189: } else {
190: // we can ignore negative values, bc they're already prefixed
191: // by NEG
192: if (lbits < 0)
193: Kit.codeBug();
194:
195: // will it fit in a char?
196: // this gives a short encoding for integer values up to 2^16.
197: if (lbits <= Character.MAX_VALUE) {
198: append('S');
199: append((char) lbits);
200: } else { // Integral, but won't fit in a char. Store as a long.
201: append('J');
202: append((char) (lbits >> 48));
203: append((char) (lbits >> 32));
204: append((char) (lbits >> 16));
205: append((char) lbits);
206: }
207: }
208: }
209:
210: private void appendString(String str) {
211: int L = str.length();
212: int lengthEncodingSize = 1;
213: if (L >= 0x8000) {
214: lengthEncodingSize = 2;
215: }
216: int nextTop = sourceTop + lengthEncodingSize + L;
217: if (nextTop > sourceBuffer.length) {
218: increaseSourceCapacity(nextTop);
219: }
220: if (L >= 0x8000) {
221: // Use 2 chars to encode strings exceeding 32K, were the highest
222: // bit in the first char indicates presence of the next byte
223: sourceBuffer[sourceTop] = (char) (0x8000 | (L >>> 16));
224: ++sourceTop;
225: }
226: sourceBuffer[sourceTop] = (char) L;
227: ++sourceTop;
228: str.getChars(0, L, sourceBuffer, sourceTop);
229: sourceTop = nextTop;
230: }
231:
232: private void append(char c) {
233: if (sourceTop == sourceBuffer.length) {
234: increaseSourceCapacity(sourceTop + 1);
235: }
236: sourceBuffer[sourceTop] = c;
237: ++sourceTop;
238: }
239:
240: private void increaseSourceCapacity(int minimalCapacity) {
241: // Call this only when capacity increase is must
242: if (minimalCapacity <= sourceBuffer.length)
243: Kit.codeBug();
244: int newCapacity = sourceBuffer.length * 2;
245: if (newCapacity < minimalCapacity) {
246: newCapacity = minimalCapacity;
247: }
248: char[] tmp = new char[newCapacity];
249: System.arraycopy(sourceBuffer, 0, tmp, 0, sourceTop);
250: sourceBuffer = tmp;
251: }
252:
253: private String sourceToString(int offset) {
254: if (offset < 0 || sourceTop < offset)
255: Kit.codeBug();
256: return new String(sourceBuffer, offset, sourceTop - offset);
257: }
258:
259: /**
260: * Decompile the source information associated with this js
261: * function/script back into a string. For the most part, this
262: * just means translating tokens back to their string
263: * representations; there's a little bit of lookahead logic to
264: * decide the proper spacing/indentation. Most of the work in
265: * mapping the original source to the prettyprinted decompiled
266: * version is done by the parser.
267: *
268: * @param source encoded source tree presentation
269: *
270: * @param flags flags to select output format
271: *
272: * @param properties indentation properties
273: *
274: */
275: public static String decompile(String source, int flags,
276: UintMap properties) {
277: int length = source.length();
278: if (length == 0) {
279: return "";
280: }
281:
282: int indent = properties.getInt(INITIAL_INDENT_PROP, 0);
283: if (indent < 0)
284: throw new IllegalArgumentException();
285: int indentGap = properties.getInt(INDENT_GAP_PROP, 4);
286: if (indentGap < 0)
287: throw new IllegalArgumentException();
288: int caseGap = properties.getInt(CASE_GAP_PROP, 2);
289: if (caseGap < 0)
290: throw new IllegalArgumentException();
291:
292: StringBuffer result = new StringBuffer();
293: boolean justFunctionBody = (0 != (flags & Decompiler.ONLY_BODY_FLAG));
294: boolean toSource = (0 != (flags & Decompiler.TO_SOURCE_FLAG));
295:
296: // Spew tokens in source, for debugging.
297: // as TYPE number char
298: if (printSource) {
299: System.err.println("length:" + length);
300: for (int i = 0; i < length; ++i) {
301: // Note that tokenToName will fail unless Context.printTrees
302: // is true.
303: String tokenname = null;
304: if (Token.printNames) {
305: tokenname = Token.name(source.charAt(i));
306: }
307: if (tokenname == null) {
308: tokenname = "---";
309: }
310: String pad = tokenname.length() > 7 ? "\t" : "\t\t";
311: System.err.println(tokenname
312: + pad
313: + (int) source.charAt(i)
314: + "\t'"
315: + ScriptRuntime.escapeString(source.substring(
316: i, i + 1)) + "'");
317: }
318: System.err.println();
319: }
320:
321: int braceNesting = 0;
322: boolean afterFirstEOL = false;
323: int i = 0;
324: int topFunctionType;
325: if (source.charAt(i) == Token.SCRIPT) {
326: ++i;
327: topFunctionType = -1;
328: } else {
329: topFunctionType = source.charAt(i + 1);
330: }
331:
332: if (!toSource) {
333: // add an initial newline to exactly match js.
334: result.append('\n');
335: for (int j = 0; j < indent; j++)
336: result.append(' ');
337: } else {
338: if (topFunctionType == FunctionNode.FUNCTION_EXPRESSION) {
339: result.append('(');
340: }
341: }
342:
343: while (i < length) {
344: switch (source.charAt(i)) {
345: case Token.GET:
346: case Token.SET:
347: result.append(source.charAt(i) == Token.GET ? "get "
348: : "set ");
349: ++i;
350: i = printSourceString(source, i + 1, false, result);
351: // Now increment one more to get past the FUNCTION token
352: ++i;
353: break;
354:
355: case Token.NAME:
356: case Token.REGEXP: // re-wrapped in '/'s in parser...
357: i = printSourceString(source, i + 1, false, result);
358: continue;
359:
360: case Token.STRING:
361: i = printSourceString(source, i + 1, true, result);
362: continue;
363:
364: case Token.NUMBER:
365: i = printSourceNumber(source, i + 1, result);
366: continue;
367:
368: case Token.TRUE:
369: result.append("true");
370: break;
371:
372: case Token.FALSE:
373: result.append("false");
374: break;
375:
376: case Token.NULL:
377: result.append("null");
378: break;
379:
380: case Token.THIS:
381: result.append("this");
382: break;
383:
384: case Token.FUNCTION:
385: ++i; // skip function type
386: result.append("function ");
387: break;
388:
389: case FUNCTION_END:
390: // Do nothing
391: break;
392:
393: case Token.COMMA:
394: result.append(", ");
395: break;
396:
397: case Token.LC:
398: ++braceNesting;
399: if (Token.EOL == getNext(source, length, i))
400: indent += indentGap;
401: result.append('{');
402: break;
403:
404: case Token.RC: {
405: --braceNesting;
406: /* don't print the closing RC if it closes the
407: * toplevel function and we're called from
408: * decompileFunctionBody.
409: */
410: if (justFunctionBody && braceNesting == 0)
411: break;
412:
413: result.append('}');
414: switch (getNext(source, length, i)) {
415: case Token.EOL:
416: case FUNCTION_END:
417: indent -= indentGap;
418: break;
419: case Token.WHILE:
420: case Token.ELSE:
421: indent -= indentGap;
422: result.append(' ');
423: break;
424: }
425: break;
426: }
427: case Token.LP:
428: result.append('(');
429: break;
430:
431: case Token.RP:
432: result.append(')');
433: if (Token.LC == getNext(source, length, i))
434: result.append(' ');
435: break;
436:
437: case Token.LB:
438: result.append('[');
439: break;
440:
441: case Token.RB:
442: result.append(']');
443: break;
444:
445: case Token.EOL: {
446: if (toSource)
447: break;
448: boolean newLine = true;
449: if (!afterFirstEOL) {
450: afterFirstEOL = true;
451: if (justFunctionBody) {
452: /* throw away just added 'function name(...) {'
453: * and restore the original indent
454: */
455: result.setLength(0);
456: indent -= indentGap;
457: newLine = false;
458: }
459: }
460: if (newLine) {
461: result.append('\n');
462: }
463:
464: /* add indent if any tokens remain,
465: * less setback if next token is
466: * a label, case or default.
467: */
468: if (i + 1 < length) {
469: int less = 0;
470: int nextToken = source.charAt(i + 1);
471: if (nextToken == Token.CASE
472: || nextToken == Token.DEFAULT) {
473: less = indentGap - caseGap;
474: } else if (nextToken == Token.RC) {
475: less = indentGap;
476: }
477:
478: /* elaborate check against label... skip past a
479: * following inlined NAME and look for a COLON.
480: */
481: else if (nextToken == Token.NAME) {
482: int afterName = getSourceStringEnd(source,
483: i + 2);
484: if (source.charAt(afterName) == Token.COLON)
485: less = indentGap;
486: }
487:
488: for (; less < indent; less++)
489: result.append(' ');
490: }
491: break;
492: }
493: case Token.DOT:
494: result.append('.');
495: break;
496:
497: case Token.NEW:
498: result.append("new ");
499: break;
500:
501: case Token.DELPROP:
502: result.append("delete ");
503: break;
504:
505: case Token.IF:
506: result.append("if ");
507: break;
508:
509: case Token.ELSE:
510: result.append("else ");
511: break;
512:
513: case Token.FOR:
514: result.append("for ");
515: break;
516:
517: case Token.IN:
518: result.append(" in ");
519: break;
520:
521: case Token.WITH:
522: result.append("with ");
523: break;
524:
525: case Token.WHILE:
526: result.append("while ");
527: break;
528:
529: case Token.DO:
530: result.append("do ");
531: break;
532:
533: case Token.TRY:
534: result.append("try ");
535: break;
536:
537: case Token.CATCH:
538: result.append("catch ");
539: break;
540:
541: case Token.FINALLY:
542: result.append("finally ");
543: break;
544:
545: case Token.THROW:
546: result.append("throw ");
547: break;
548:
549: case Token.SWITCH:
550: result.append("switch ");
551: break;
552:
553: case Token.BREAK:
554: result.append("break");
555: if (Token.NAME == getNext(source, length, i))
556: result.append(' ');
557: break;
558:
559: case Token.CONTINUE:
560: result.append("continue");
561: if (Token.NAME == getNext(source, length, i))
562: result.append(' ');
563: break;
564:
565: case Token.CASE:
566: result.append("case ");
567: break;
568:
569: case Token.DEFAULT:
570: result.append("default");
571: break;
572:
573: case Token.RETURN:
574: result.append("return");
575: if (Token.SEMI != getNext(source, length, i))
576: result.append(' ');
577: break;
578:
579: case Token.VAR:
580: result.append("var ");
581: break;
582:
583: case Token.LET:
584: result.append("let ");
585: break;
586:
587: case Token.SEMI:
588: result.append(';');
589: if (Token.EOL != getNext(source, length, i)) {
590: // separators in FOR
591: result.append(' ');
592: }
593: break;
594:
595: case Token.ASSIGN:
596: result.append(" = ");
597: break;
598:
599: case Token.ASSIGN_ADD:
600: result.append(" += ");
601: break;
602:
603: case Token.ASSIGN_SUB:
604: result.append(" -= ");
605: break;
606:
607: case Token.ASSIGN_MUL:
608: result.append(" *= ");
609: break;
610:
611: case Token.ASSIGN_DIV:
612: result.append(" /= ");
613: break;
614:
615: case Token.ASSIGN_MOD:
616: result.append(" %= ");
617: break;
618:
619: case Token.ASSIGN_BITOR:
620: result.append(" |= ");
621: break;
622:
623: case Token.ASSIGN_BITXOR:
624: result.append(" ^= ");
625: break;
626:
627: case Token.ASSIGN_BITAND:
628: result.append(" &= ");
629: break;
630:
631: case Token.ASSIGN_LSH:
632: result.append(" <<= ");
633: break;
634:
635: case Token.ASSIGN_RSH:
636: result.append(" >>= ");
637: break;
638:
639: case Token.ASSIGN_URSH:
640: result.append(" >>>= ");
641: break;
642:
643: case Token.HOOK:
644: result.append(" ? ");
645: break;
646:
647: case Token.OBJECTLIT:
648: // pun OBJECTLIT to mean colon in objlit property
649: // initialization.
650: // This needs to be distinct from COLON in the general case
651: // to distinguish from the colon in a ternary... which needs
652: // different spacing.
653: result.append(':');
654: break;
655:
656: case Token.COLON:
657: if (Token.EOL == getNext(source, length, i))
658: // it's the end of a label
659: result.append(':');
660: else
661: // it's the middle part of a ternary
662: result.append(" : ");
663: break;
664:
665: case Token.OR:
666: result.append(" || ");
667: break;
668:
669: case Token.AND:
670: result.append(" && ");
671: break;
672:
673: case Token.BITOR:
674: result.append(" | ");
675: break;
676:
677: case Token.BITXOR:
678: result.append(" ^ ");
679: break;
680:
681: case Token.BITAND:
682: result.append(" & ");
683: break;
684:
685: case Token.SHEQ:
686: result.append(" === ");
687: break;
688:
689: case Token.SHNE:
690: result.append(" !== ");
691: break;
692:
693: case Token.EQ:
694: result.append(" == ");
695: break;
696:
697: case Token.NE:
698: result.append(" != ");
699: break;
700:
701: case Token.LE:
702: result.append(" <= ");
703: break;
704:
705: case Token.LT:
706: result.append(" < ");
707: break;
708:
709: case Token.GE:
710: result.append(" >= ");
711: break;
712:
713: case Token.GT:
714: result.append(" > ");
715: break;
716:
717: case Token.INSTANCEOF:
718: result.append(" instanceof ");
719: break;
720:
721: case Token.LSH:
722: result.append(" << ");
723: break;
724:
725: case Token.RSH:
726: result.append(" >> ");
727: break;
728:
729: case Token.URSH:
730: result.append(" >>> ");
731: break;
732:
733: case Token.TYPEOF:
734: result.append("typeof ");
735: break;
736:
737: case Token.VOID:
738: result.append("void ");
739: break;
740:
741: case Token.CONST:
742: result.append("const ");
743: break;
744:
745: case Token.YIELD:
746: result.append("yield ");
747: break;
748:
749: case Token.NOT:
750: result.append('!');
751: break;
752:
753: case Token.BITNOT:
754: result.append('~');
755: break;
756:
757: case Token.POS:
758: result.append('+');
759: break;
760:
761: case Token.NEG:
762: result.append('-');
763: break;
764:
765: case Token.INC:
766: result.append("++");
767: break;
768:
769: case Token.DEC:
770: result.append("--");
771: break;
772:
773: case Token.ADD:
774: result.append(" + ");
775: break;
776:
777: case Token.SUB:
778: result.append(" - ");
779: break;
780:
781: case Token.MUL:
782: result.append(" * ");
783: break;
784:
785: case Token.DIV:
786: result.append(" / ");
787: break;
788:
789: case Token.MOD:
790: result.append(" % ");
791: break;
792:
793: case Token.COLONCOLON:
794: result.append("::");
795: break;
796:
797: case Token.DOTDOT:
798: result.append("..");
799: break;
800:
801: case Token.DOTQUERY:
802: result.append(".(");
803: break;
804:
805: case Token.XMLATTR:
806: result.append('@');
807: break;
808:
809: default:
810: // If we don't know how to decompile it, raise an exception.
811: throw new RuntimeException("Token: "
812: + Token.name(source.charAt(i)));
813: }
814: ++i;
815: }
816:
817: if (!toSource) {
818: // add that trailing newline if it's an outermost function.
819: if (!justFunctionBody)
820: result.append('\n');
821: } else {
822: if (topFunctionType == FunctionNode.FUNCTION_EXPRESSION) {
823: result.append(')');
824: }
825: }
826:
827: return result.toString();
828: }
829:
830: private static int getNext(String source, int length, int i) {
831: return (i + 1 < length) ? source.charAt(i + 1) : Token.EOF;
832: }
833:
834: private static int getSourceStringEnd(String source, int offset) {
835: return printSourceString(source, offset, false, null);
836: }
837:
838: private static int printSourceString(String source, int offset,
839: boolean asQuotedString, StringBuffer sb) {
840: int length = source.charAt(offset);
841: ++offset;
842: if ((0x8000 & length) != 0) {
843: length = ((0x7FFF & length) << 16) | source.charAt(offset);
844: ++offset;
845: }
846: if (sb != null) {
847: String str = source.substring(offset, offset + length);
848: if (!asQuotedString) {
849: sb.append(str);
850: } else {
851: sb.append('"');
852: sb.append(ScriptRuntime.escapeString(str));
853: sb.append('"');
854: }
855: }
856: return offset + length;
857: }
858:
859: private static int printSourceNumber(String source, int offset,
860: StringBuffer sb) {
861: double number = 0.0;
862: char type = source.charAt(offset);
863: ++offset;
864: if (type == 'S') {
865: if (sb != null) {
866: int ival = source.charAt(offset);
867: number = ival;
868: }
869: ++offset;
870: } else if (type == 'J' || type == 'D') {
871: if (sb != null) {
872: long lbits;
873: lbits = (long) source.charAt(offset) << 48;
874: lbits |= (long) source.charAt(offset + 1) << 32;
875: lbits |= (long) source.charAt(offset + 2) << 16;
876: lbits |= source.charAt(offset + 3);
877: if (type == 'J') {
878: number = lbits;
879: } else {
880: number = Double.longBitsToDouble(lbits);
881: }
882: }
883: offset += 4;
884: } else {
885: // Bad source
886: throw new RuntimeException();
887: }
888: if (sb != null) {
889: sb.append(ScriptRuntime.numberToString(number, 10));
890: }
891: return offset;
892: }
893:
894: private char[] sourceBuffer = new char[128];
895:
896: // Per script/function source buffer top: parent source does not include a
897: // nested functions source and uses function index as a reference instead.
898: private int sourceTop;
899:
900: // whether to do a debug print of the source information, when decompiling.
901: private static final boolean printSource = false;
902:
903: }
|