01: /**
02: * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
03: */package net.sourceforge.pmd.cpd;
04:
05: import net.sourceforge.pmd.TargetJDK1_4;
06: import net.sourceforge.pmd.ast.JavaParserConstants;
07: import net.sourceforge.pmd.ast.JavaParserTokenManager;
08: import net.sourceforge.pmd.ast.Token;
09:
10: import java.io.StringReader;
11: import java.util.Properties;
12:
13: public class JavaTokenizer implements Tokenizer {
14:
15: public static final String IGNORE_LITERALS = "ignore_literals";
16: public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
17:
18: private boolean ignoreLiterals;
19: private boolean ignoreIdentifiers;
20:
21: public void setProperties(Properties properties) {
22: ignoreLiterals = Boolean.parseBoolean(properties.getProperty(
23: IGNORE_LITERALS, "false"));
24: ignoreIdentifiers = Boolean.parseBoolean(properties
25: .getProperty(IGNORE_IDENTIFIERS, "false"));
26: }
27:
28: public void tokenize(SourceCode tokens, Tokens tokenEntries) {
29: StringBuffer buffer = tokens.getCodeBuffer();
30:
31: /*
32: I'm doing a sort of State pattern thing here where
33: this goes into "discarding" mode when it hits an import or package
34: keyword and goes back into "accumulate mode" when it hits a semicolon.
35: This could probably be turned into some objects.
36: */
37: // Note that Java version is irrelevant for tokenizing
38: JavaParserTokenManager tokenMgr = new TargetJDK1_4()
39: .createJavaParserTokenManager(new StringReader(buffer
40: .toString()));
41: Token currentToken = tokenMgr.getNextToken();
42: boolean inDiscardingState = false;
43: while (currentToken.image.length() > 0) {
44: if (currentToken.kind == JavaParserConstants.IMPORT
45: || currentToken.kind == JavaParserConstants.PACKAGE) {
46: inDiscardingState = true;
47: currentToken = tokenMgr.getNextToken();
48: continue;
49: }
50:
51: if (inDiscardingState
52: && currentToken.kind == JavaParserConstants.SEMICOLON) {
53: inDiscardingState = false;
54: }
55:
56: if (inDiscardingState) {
57: currentToken = tokenMgr.getNextToken();
58: continue;
59: }
60:
61: if (currentToken.kind != JavaParserConstants.SEMICOLON) {
62: String image = currentToken.image;
63: if (ignoreLiterals
64: && (currentToken.kind == JavaParserConstants.STRING_LITERAL
65: || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
66: || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
67: image = String.valueOf(currentToken.kind);
68: }
69: if (ignoreIdentifiers
70: && currentToken.kind == JavaParserConstants.IDENTIFIER) {
71: image = String.valueOf(currentToken.kind);
72: }
73: tokenEntries.add(new TokenEntry(image, tokens
74: .getFileName(), currentToken.beginLine));
75: }
76:
77: currentToken = tokenMgr.getNextToken();
78: }
79: tokenEntries.add(TokenEntry.getEOF());
80: }
81:
82: public void setIgnoreLiterals(boolean ignore) {
83: this .ignoreLiterals = ignore;
84: }
85:
86: public void setIgnoreIdentifiers(boolean ignore) {
87: this.ignoreIdentifiers = ignore;
88: }
89: }
|