001: /**
002: * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003: */package net.sourceforge.pmd.cpd;
004:
005: import java.io.File;
006: import java.io.FileNotFoundException;
007: import java.io.IOException;
008: import java.util.HashMap;
009: import java.util.HashSet;
010: import java.util.Iterator;
011: import java.util.List;
012: import java.util.Map;
013: import java.util.Set;
014:
015: import net.sourceforge.pmd.util.FileFinder;
016:
017: public class CPD {
018:
019: private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
020: private CPDListener listener = new CPDNullListener();
021: private Tokens tokens = new Tokens();
022: private int minimumTileSize;
023: private MatchAlgorithm matchAlgorithm;
024: private Language language;
025: private boolean skipDuplicates;
026: public static boolean debugEnable = false;
027: private String encoding = System.getProperty("file.encoding");
028:
029: public CPD(int minimumTileSize, Language language) {
030: this .minimumTileSize = minimumTileSize;
031: this .language = language;
032: }
033:
034: public void skipDuplicates() {
035: this .skipDuplicates = true;
036: }
037:
038: public void setCpdListener(CPDListener cpdListener) {
039: this .listener = cpdListener;
040: }
041:
042: public void setEncoding(String encoding) {
043: this .encoding = encoding;
044: }
045:
046: public void go() {
047: TokenEntry.clearImages();
048: matchAlgorithm = new MatchAlgorithm(source, tokens,
049: minimumTileSize, listener);
050: matchAlgorithm.findMatches();
051: }
052:
053: public Iterator<Match> getMatches() {
054: return matchAlgorithm.matches();
055: }
056:
057: public void add(File file) throws IOException {
058: add(1, file);
059: }
060:
061: public void addAllInDirectory(String dir) throws IOException {
062: addDirectory(dir, false);
063: }
064:
065: public void addRecursively(String dir) throws IOException {
066: addDirectory(dir, true);
067: }
068:
069: public void add(List<File> files) throws IOException {
070: for (File f : files) {
071: add(files.size(), f);
072: }
073: }
074:
075: private void addDirectory(String dir, boolean recurse)
076: throws IOException {
077: if (!(new File(dir)).exists()) {
078: throw new FileNotFoundException("Couldn't find directory "
079: + dir);
080: }
081: FileFinder finder = new FileFinder();
082: // TODO - could use SourceFileSelector here
083: add(finder
084: .findFilesFrom(dir, language.getFileFilter(), recurse));
085: }
086:
087: private Set<String> current = new HashSet<String>();
088:
089: private void add(int fileCount, File file) throws IOException {
090:
091: if (skipDuplicates) {
092: // TODO refactor this thing into a separate class
093: String signature = file.getName() + '_' + file.length();
094: if (current.contains(signature)) {
095: System.err
096: .println("Skipping "
097: + file.getAbsolutePath()
098: + " since it appears to be a duplicate file and --skip-duplicate-files is set");
099: return;
100: }
101: current.add(signature);
102: }
103:
104: if (!file.getCanonicalPath().equals(
105: new File(file.getAbsolutePath()).getCanonicalPath())) {
106: System.err.println("Skipping " + file
107: + " since it appears to be a symlink");
108: return;
109: }
110:
111: listener.addedFile(fileCount, file);
112: SourceCode sourceCode = new SourceCode(
113: new SourceCode.FileCodeLoader(file, encoding));
114: language.getTokenizer().tokenize(sourceCode, tokens);
115: source.put(sourceCode.getFileName(), sourceCode);
116: }
117:
118: public static Renderer getRendererFromString(String name,
119: String encoding) {
120: if (name.equalsIgnoreCase("text") || name.equals("")) {
121: return new SimpleRenderer();
122: } else if ("xml".equals(name)) {
123: return new XMLRenderer(encoding);
124: } else if ("csv".equals(name)) {
125: return new CSVRenderer();
126: } else if ("vs".equals(name)) {
127: return new VSRenderer();
128: }
129: try {
130: return (Renderer) Class.forName(name).newInstance();
131: } catch (Exception e) {
132: System.out.println("Can't find class '" + name
133: + "', defaulting to SimpleRenderer.");
134: }
135: return new SimpleRenderer();
136: }
137:
138: private static boolean findBooleanSwitch(String[] args, String name) {
139: for (int i = 0; i < args.length; i++) {
140: if (args[i].equals(name)) {
141: return true;
142: }
143: }
144: return false;
145: }
146:
147: private static String findRequiredStringValue(String[] args,
148: String name) {
149: for (int i = 0; i < args.length; i++) {
150: if (args[i].equals(name)) {
151: return args[i + 1];
152: }
153: }
154: System.out.println("No " + name + " value passed in");
155: usage();
156: throw new RuntimeException();
157: }
158:
159: private static String findOptionalStringValue(String[] args,
160: String name, String defaultValue) {
161: for (int i = 0; i < args.length; i++) {
162: if (args[i].equals(name)) {
163: return args[i + 1];
164: }
165: }
166: return defaultValue;
167: }
168:
169: public static void main(String[] args) {
170: if (args.length == 0) {
171: usage();
172: }
173:
174: try {
175: boolean skipDuplicateFiles = findBooleanSwitch(args,
176: "--skip-duplicate-files");
177: String languageString = findOptionalStringValue(args,
178: "--language", "java");
179: String formatString = findOptionalStringValue(args,
180: "--format", "text");
181: String encodingString = findOptionalStringValue(args,
182: "--encoding", System.getProperty("file.encoding"));
183: int minimumTokens = Integer
184: .parseInt(findRequiredStringValue(args,
185: "--minimum-tokens"));
186: LanguageFactory f = new LanguageFactory();
187: Language language = f.createLanguage(languageString);
188: Renderer renderer = CPD.getRendererFromString(formatString,
189: encodingString);
190: CPD cpd = new CPD(minimumTokens, language);
191: cpd.setEncoding(encodingString);
192: if (skipDuplicateFiles) {
193: cpd.skipDuplicates();
194: }
195: /* FIXME: Improve this !!! */
196: boolean missingFiles = true;
197: for (int position = 0; position < args.length; position++) {
198: if (args[position].equals("--files")) {
199: cpd.addRecursively(args[position + 1]);
200: if (missingFiles) {
201: missingFiles = false;
202: }
203: }
204: }
205:
206: if (missingFiles) {
207: System.out.println("No " + "--files"
208: + " value passed in");
209: usage();
210: throw new RuntimeException();
211: }
212:
213: cpd.go();
214: System.out.println(renderer.render(cpd.getMatches()));
215: } catch (Exception e) {
216: e.printStackTrace();
217: }
218: }
219:
220: private static void usage() {
221: System.out.println("Usage:");
222: System.out
223: .println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
224: System.out.println("i.e: ");
225: System.out
226: .println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
227: System.out.println("or: ");
228: System.out
229: .println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
230: System.out.println("or: ");
231: System.out
232: .println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
233: }
234:
235: }
|