001: /*
002: * Copyright 2000-2004 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: *
016: */
017: package org.apache.bcel.util;
018:
019: import java.util.ArrayList;
020: import java.util.HashMap;
021: import java.util.Iterator;
022: import java.util.List;
023: import java.util.Locale;
024: import java.util.Map;
025: import java.util.regex.Matcher;
026: import java.util.regex.Pattern;
027: import org.apache.bcel.Constants;
028: import org.apache.bcel.generic.ClassGenException;
029: import org.apache.bcel.generic.Instruction;
030: import org.apache.bcel.generic.InstructionHandle;
031: import org.apache.bcel.generic.InstructionList;
032:
033: /**
034: * InstructionFinder is a tool to search for given instructions patterns, i.e.,
035: * match sequences of instructions in an instruction list via regular
036: * expressions. This can be used, e.g., in order to implement a peep hole
037: * optimizer that looks for code patterns and replaces them with faster
038: * equivalents.
039: *
040: * <p>
041: * This class internally uses the <a href="http://jakarta.apache.org/regexp/">
042: * Regexp</a> package to search for regular expressions.
043: *
044: * A typical application would look like this:
045: *
046: * <pre>
047: *
048: *
049: * InstructionFinder f = new InstructionFinder(il);
050: * String pat = "IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)";
051: *
052: * for(Iterator i = f.search(pat, constraint); i.hasNext(); ) {
053: * InstructionHandle[] match = (InstructionHandle[])i.next();
054: * ...
055: * il.delete(match[1], match[5]);
056: * ...
057: * }
058: *
059: *
060: * </pre>
061: *
062: * @version $Id: InstructionFinder.java 386056 2006-03-15 11:31:56Z tcurdt $
063: * @author <A HREF="mailto:m.dahm@gmx.de">M. Dahm</A>
064: * @see Instruction
065: * @see InstructionList
066: */
067: public class InstructionFinder {
068:
069: private static final int OFFSET = 32767; // char + OFFSET is
070: // outside of
071: // LATIN-1
072: private static final int NO_OPCODES = 256; // Potential number,
073: // some are not used
074: private static final Map map = new HashMap(); // Map<String,Pattern>
075: private InstructionList il;
076: private String il_string; // instruction list
077: // as string
078: private InstructionHandle[] handles; // map instruction
079:
080: // list to array
081: /**
082: * @param il
083: * instruction list to search for given patterns
084: */
085: public InstructionFinder(InstructionList il) {
086: this .il = il;
087: reread();
088: }
089:
090: /**
091: * Reread the instruction list, e.g., after you've altered the list upon a
092: * match.
093: */
094: public final void reread() {
095: int size = il.getLength();
096: char[] buf = new char[size]; // Create a string with length equal to il
097: // length
098: handles = il.getInstructionHandles();
099: // Map opcodes to characters
100: for (int i = 0; i < size; i++) {
101: buf[i] = makeChar(handles[i].getInstruction().getOpcode());
102: }
103: il_string = new String(buf);
104: }
105:
106: /**
107: * Map symbolic instruction names like "getfield" to a single character.
108: *
109: * @param pattern
110: * instruction pattern in lower case
111: * @return encoded string for a pattern such as "BranchInstruction".
112: */
113: private static final String mapName(String pattern) {
114: String result = (String) map.get(pattern);
115: if (result != null) {
116: return result;
117: }
118: for (short i = 0; i < NO_OPCODES; i++) {
119: if (pattern.equals(Constants.OPCODE_NAMES[i])) {
120: return "" + makeChar(i);
121: }
122: }
123: throw new RuntimeException("Instruction unknown: " + pattern);
124: }
125:
126: /**
127: * Replace symbolic names of instructions with the appropiate character and
128: * remove all white space from string. Meta characters such as +, * are
129: * ignored.
130: *
131: * @param pattern
132: * The pattern to compile
133: * @return translated regular expression string
134: */
135: private static final String compilePattern(String pattern) {
136: //Bug: 38787 - Instructions are assumed to be english, to avoid odd Locale issues
137: String lower = pattern.toLowerCase(Locale.ENGLISH);
138: StringBuffer buf = new StringBuffer();
139: int size = pattern.length();
140: for (int i = 0; i < size; i++) {
141: char ch = lower.charAt(i);
142: if (Character.isLetterOrDigit(ch)) {
143: StringBuffer name = new StringBuffer();
144: while ((Character.isLetterOrDigit(ch) || ch == '_')
145: && i < size) {
146: name.append(ch);
147: if (++i < size) {
148: ch = lower.charAt(i);
149: } else {
150: break;
151: }
152: }
153: i--;
154: buf.append(mapName(name.toString()));
155: } else if (!Character.isWhitespace(ch)) {
156: buf.append(ch);
157: }
158: }
159: return buf.toString();
160: }
161:
162: /**
163: * @return the matched piece of code as an array of instruction (handles)
164: */
165: private InstructionHandle[] getMatch(int matched_from,
166: int match_length) {
167: InstructionHandle[] match = new InstructionHandle[match_length];
168: System.arraycopy(handles, matched_from, match, 0, match_length);
169: return match;
170: }
171:
172: /**
173: * Search for the given pattern in the instruction list. You can search for
174: * any valid opcode via its symbolic name, e.g. "istore". You can also use a
175: * super class or an interface name to match a whole set of instructions, e.g.
176: * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all
177: * "istore_x" instructions. Additional aliases are "if" for "ifxx", "if_icmp"
178: * for "if_icmpxx", "if_acmp" for "if_acmpxx".
179: *
180: * Consecutive instruction names must be separated by white space which will
181: * be removed during the compilation of the pattern.
182: *
183: * For the rest the usual pattern matching rules for regular expressions
184: * apply.
185: * <P>
186: * Example pattern:
187: *
188: * <pre>
189: * search("BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*");
190: * </pre>
191: *
192: * <p>
193: * If you alter the instruction list upon a match such that other matching
194: * areas are affected, you should call reread() to update the finder and call
195: * search() again, because the matches are cached.
196: *
197: * @param pattern
198: * the instruction pattern to search for, where case is ignored
199: * @param from
200: * where to start the search in the instruction list
201: * @param constraint
202: * optional CodeConstraint to check the found code pattern for
203: * user-defined constraints
204: * @return iterator of matches where e.nextElement() returns an array of
205: * instruction handles describing the matched area
206: */
207: public final Iterator search(String pattern,
208: InstructionHandle from, CodeConstraint constraint) {
209: String search = compilePattern(pattern);
210: int start = -1;
211: for (int i = 0; i < handles.length; i++) {
212: if (handles[i] == from) {
213: start = i; // Where to start search from (index)
214: break;
215: }
216: }
217: if (start == -1) {
218: throw new ClassGenException("Instruction handle " + from
219: + " not found in instruction list.");
220: }
221: Pattern regex = Pattern.compile(search);
222: List matches = new ArrayList();
223: Matcher matcher = regex.matcher(il_string);
224: while (start < il_string.length() && matcher.find(start)) {
225: int startExpr = matcher.start();
226: int endExpr = matcher.end();
227: int lenExpr = (endExpr - startExpr) + 1;
228: InstructionHandle[] match = getMatch(startExpr, lenExpr);
229: if ((constraint == null) || constraint.checkCode(match)) {
230: matches.add(match);
231: }
232: start = endExpr;
233: }
234: return matches.iterator();
235: }
236:
237: /**
238: * Start search beginning from the start of the given instruction list.
239: *
240: * @param pattern
241: * the instruction pattern to search for, where case is ignored
242: * @return iterator of matches where e.nextElement() returns an array of
243: * instruction handles describing the matched area
244: */
245: public final Iterator search(String pattern) {
246: return search(pattern, il.getStart(), null);
247: }
248:
249: /**
250: * Start search beginning from `from'.
251: *
252: * @param pattern
253: * the instruction pattern to search for, where case is ignored
254: * @param from
255: * where to start the search in the instruction list
256: * @return iterator of matches where e.nextElement() returns an array of
257: * instruction handles describing the matched area
258: */
259: public final Iterator search(String pattern, InstructionHandle from) {
260: return search(pattern, from, null);
261: }
262:
263: /**
264: * Start search beginning from the start of the given instruction list. Check
265: * found matches with the constraint object.
266: *
267: * @param pattern
268: * the instruction pattern to search for, case is ignored
269: * @param constraint
270: * constraints to be checked on matching code
271: * @return instruction handle or `null' if the match failed
272: */
273: public final Iterator search(String pattern,
274: CodeConstraint constraint) {
275: return search(pattern, il.getStart(), constraint);
276: }
277:
278: /**
279: * Convert opcode number to char.
280: */
281: private static final char makeChar(short opcode) {
282: return (char) (opcode + OFFSET);
283: }
284:
285: /**
286: * @return the inquired instruction list
287: */
288: public final InstructionList getInstructionList() {
289: return il;
290: }
291:
292: /**
293: * Code patterns found may be checked using an additional user-defined
294: * constraint object whether they really match the needed criterion. I.e.,
295: * check constraints that can not expressed with regular expressions.
296: *
297: */
298: public static interface CodeConstraint {
299:
300: /**
301: * @param match
302: * array of instructions matching the requested pattern
303: * @return true if the matched area is really useful
304: */
305: public boolean checkCode(InstructionHandle[] match);
306: }
307:
308: // Initialize pattern map
309: static {
310: map
311: .put(
312: "arithmeticinstruction",
313: "(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
314: map
315: .put("invokeinstruction",
316: "(invokevirtual|invokeinterface|invokestatic|invokespecial)");
317: map
318: .put(
319: "arrayinstruction",
320: "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
321: map.put("gotoinstruction", "(goto|goto_w)");
322: map
323: .put("conversioninstruction",
324: "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
325: map
326: .put("localvariableinstruction",
327: "(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
328: map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
329: map.put("fieldinstruction",
330: "(getfield|putstatic|getstatic|putfield)");
331: map
332: .put(
333: "cpinstruction",
334: "(ldc2_w|invokeinterface|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
335: map
336: .put("stackinstruction",
337: "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
338: map
339: .put(
340: "branchinstruction",
341: "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
342: map.put("returninstruction",
343: "(lreturn|ireturn|freturn|dreturn|areturn|return)");
344: map.put("storeinstruction",
345: "(istore|fstore|dstore|astore|lstore)");
346: map.put("select", "(tableswitch|lookupswitch)");
347: map
348: .put(
349: "ifinstruction",
350: "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
351: map.put("jsrinstruction", "(jsr|jsr_w)");
352: map.put("variablelengthinstruction",
353: "(tableswitch|jsr|goto|lookupswitch)");
354: map
355: .put("unconditionalbranch",
356: "(goto|jsr|jsr_w|athrow|goto_w)");
357: map.put("constantpushinstruction",
358: "(dconst|bipush|sipush|fconst|iconst|lconst)");
359: map
360: .put(
361: "typedinstruction",
362: "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
363: map
364: .put("popinstruction",
365: "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
366: map.put("allocationinstruction",
367: "(multianewarray|new|anewarray|newarray)");
368: map
369: .put(
370: "indexedinstruction",
371: "(lload|lstore|fload|ldc2_w|invokeinterface|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
372: map
373: .put(
374: "pushinstruction",
375: "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
376: map
377: .put(
378: "stackproducer",
379: "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
380: map
381: .put(
382: "stackconsumer",
383: "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
384: map
385: .put(
386: "exceptionthrower",
387: "(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|ldc|invokestatic|daload)");
388: map
389: .put(
390: "loadclass",
391: "(multianewarray|invokeinterface|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
392: map
393: .put(
394: "instructiontargeter",
395: "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
396: // Some aliases
397: map
398: .put("if_icmp",
399: "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
400: map.put("if_acmp", "(if_acmpeq|if_acmpne)");
401: map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
402: // Precompile some aliases first
403: map.put("iconst", precompile(Constants.ICONST_0,
404: Constants.ICONST_5, Constants.ICONST_M1));
405: map.put("lconst", new String(new char[] { '(',
406: makeChar(Constants.LCONST_0), '|',
407: makeChar(Constants.LCONST_1), ')' }));
408: map.put("dconst", new String(new char[] { '(',
409: makeChar(Constants.DCONST_0), '|',
410: makeChar(Constants.DCONST_1), ')' }));
411: map.put("fconst", new String(new char[] { '(',
412: makeChar(Constants.FCONST_0), '|',
413: makeChar(Constants.FCONST_1), ')' }));
414: map.put("iload", precompile(Constants.ILOAD_0,
415: Constants.ILOAD_3, Constants.ILOAD));
416: map.put("dload", precompile(Constants.DLOAD_0,
417: Constants.DLOAD_3, Constants.DLOAD));
418: map.put("fload", precompile(Constants.FLOAD_0,
419: Constants.FLOAD_3, Constants.FLOAD));
420: map.put("aload", precompile(Constants.ALOAD_0,
421: Constants.ALOAD_3, Constants.ALOAD));
422: map.put("istore", precompile(Constants.ISTORE_0,
423: Constants.ISTORE_3, Constants.ISTORE));
424: map.put("dstore", precompile(Constants.DSTORE_0,
425: Constants.DSTORE_3, Constants.DSTORE));
426: map.put("fstore", precompile(Constants.FSTORE_0,
427: Constants.FSTORE_3, Constants.FSTORE));
428: map.put("astore", precompile(Constants.ASTORE_0,
429: Constants.ASTORE_3, Constants.ASTORE));
430: // Compile strings
431: for (Iterator i = map.keySet().iterator(); i.hasNext();) {
432: String key = (String) i.next();
433: String value = (String) map.get(key);
434: char ch = value.charAt(1); // Omit already precompiled patterns
435: if (ch < OFFSET) {
436: map.put(key, compilePattern(value)); // precompile all patterns
437: }
438: }
439: // Add instruction alias to match anything
440: StringBuffer buf = new StringBuffer("(");
441: for (short i = 0; i < NO_OPCODES; i++) {
442: if (Constants.NO_OF_OPERANDS[i] != Constants.UNDEFINED) { // Not an
443: // invalid
444: // opcode
445: buf.append(makeChar(i));
446: if (i < NO_OPCODES - 1) {
447: buf.append('|');
448: }
449: }
450: }
451: buf.append(')');
452: map.put("instruction", buf.toString());
453: }
454:
455: private static String precompile(short from, short to, short extra) {
456: StringBuffer buf = new StringBuffer("(");
457: for (short i = from; i <= to; i++) {
458: buf.append(makeChar(i));
459: buf.append('|');
460: }
461: buf.append(makeChar(extra));
462: buf.append(")");
463: return buf.toString();
464: }
465:
466: /*
467: * Internal debugging routines.
468: */
469: private static final String pattern2string(String pattern) {
470: return pattern2string(pattern, true);
471: }
472:
473: private static final String pattern2string(String pattern,
474: boolean make_string) {
475: StringBuffer buf = new StringBuffer();
476: for (int i = 0; i < pattern.length(); i++) {
477: char ch = pattern.charAt(i);
478: if (ch >= OFFSET) {
479: if (make_string) {
480: buf.append(Constants.OPCODE_NAMES[ch - OFFSET]);
481: } else {
482: buf.append((ch - OFFSET));
483: }
484: } else {
485: buf.append(ch);
486: }
487: }
488: return buf.toString();
489: }
490: }
|