01: /**
02: * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
03: */package net.sourceforge.pmd.cpd;
04:
05: import java.io.BufferedReader;
06: import java.io.CharArrayReader;
07: import java.util.NoSuchElementException;
08: import java.util.StringTokenizer;
09:
10: /**
11: * This class does a best-guess try-anything tokenization.
12: *
13: * @author jheintz
14: */
15: public class AnyTokenizer implements Tokenizer {
16: public static final String TOKENS = " \t!#$%^&*(){}-=+<>/\\`~;:";
17:
18: public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
19: StringBuffer sb = sourceCode.getCodeBuffer();
20: BufferedReader reader = new BufferedReader(new CharArrayReader(
21: sb.toString().toCharArray()));
22: try {
23: int lineNumber = 1;
24: String line = reader.readLine();
25: while (line != null) {
26: StringTokenizer tokenizer = new StringTokenizer(line,
27: TOKENS, true);
28: try {
29: String token = tokenizer.nextToken();
30: while (token != null) {
31: if (!token.equals(" ") && !token.equals("\t")) {
32: tokenEntries.add(new TokenEntry(token,
33: sourceCode.getFileName(),
34: lineNumber));
35: }
36: token = tokenizer.nextToken();
37: }
38: } catch (NoSuchElementException ex) {
39: // done with tokens
40: }
41: // advance iteration variables
42: line = reader.readLine();
43: lineNumber++;
44: }
45: } catch (Exception ex) {
46: ex.printStackTrace();
47: } finally {
48: try {
49: reader.close();
50: } catch (Exception ex) {
51: }
52: tokenEntries.add(TokenEntry.getEOF());
53: }
54: }
55: }
|