001: /**
002: * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003: */package net.sourceforge.pmd.cpd;
004:
005: import java.util.ArrayList;
006: import java.util.Collections;
007: import java.util.HashMap;
008: import java.util.Iterator;
009: import java.util.List;
010: import java.util.Map;
011:
012: public class MatchAlgorithm {
013:
014: private final static int MOD = 37;
015: private int lastHash;
016: private int lastMod = 1;
017:
018: private List<Match> matches;
019: private Map<String, SourceCode> source;
020: private Tokens tokens;
021: private List<TokenEntry> code;
022: private CPDListener cpdListener;
023: private int min;
024:
025: public MatchAlgorithm(Map<String, SourceCode> sourceCode,
026: Tokens tokens, int min) {
027: this (sourceCode, tokens, min, new CPDNullListener());
028: }
029:
030: public MatchAlgorithm(Map<String, SourceCode> sourceCode,
031: Tokens tokens, int min, CPDListener listener) {
032: this .source = sourceCode;
033: this .tokens = tokens;
034: this .code = tokens.getTokens();
035: this .min = min;
036: this .cpdListener = listener;
037: for (int i = 0; i < min; i++) {
038: lastMod *= MOD;
039: }
040: }
041:
042: public void setListener(CPDListener listener) {
043: this .cpdListener = listener;
044: }
045:
046: public Iterator<Match> matches() {
047: return matches.iterator();
048: }
049:
050: public TokenEntry tokenAt(int offset, TokenEntry m) {
051: return code.get(offset + m.getIndex());
052: }
053:
054: public int getMinimumTileSize() {
055: return this .min;
056: }
057:
058: public void findMatches() {
059: cpdListener.phaseUpdate(CPDListener.HASH);
060: Map<TokenEntry, Object> markGroups = hash();
061:
062: cpdListener.phaseUpdate(CPDListener.MATCH);
063: MatchCollector matchCollector = new MatchCollector(this );
064: for (Iterator<Object> i = markGroups.values().iterator(); i
065: .hasNext();) {
066: Object o = i.next();
067: if (o instanceof List) {
068: List<TokenEntry> l = (List<TokenEntry>) o;
069:
070: Collections.reverse(l);
071: matchCollector.collect(l);
072: }
073: i.remove();
074: }
075: cpdListener.phaseUpdate(CPDListener.GROUPING);
076: matches = matchCollector.getMatches();
077: matchCollector = null;
078: for (Match match : matches) {
079: for (Iterator<TokenEntry> occurrences = match.iterator(); occurrences
080: .hasNext();) {
081: TokenEntry mark = occurrences.next();
082: match.setLineCount(tokens.getLineCount(mark, match));
083: if (!occurrences.hasNext()) {
084: int start = mark.getBeginLine();
085: int end = start + match.getLineCount() - 1;
086: SourceCode sourceCode = source.get(mark
087: .getTokenSrcID());
088: match.setSourceCodeSlice(sourceCode.getSlice(start,
089: end));
090: }
091: }
092: }
093: cpdListener.phaseUpdate(CPDListener.DONE);
094: }
095:
096: private Map<TokenEntry, Object> hash() {
097: Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(
098: tokens.size());
099: for (int i = code.size() - 1; i >= 0; i--) {
100: TokenEntry token = code.get(i);
101: if (token != TokenEntry.EOF) {
102: int last = tokenAt(min, token).getIdentifier();
103: lastHash = MOD * lastHash + token.getIdentifier()
104: - lastMod * last;
105: token.setHashCode(lastHash);
106: Object o = markGroups.get(token);
107:
108: // Note that this insertion method is worthwhile since the vast majority
109: // markGroup keys will have only one value.
110: if (o == null) {
111: markGroups.put(token, token);
112: } else if (o instanceof TokenEntry) {
113: List<TokenEntry> l = new ArrayList<TokenEntry>();
114: l.add((TokenEntry) o);
115: l.add(token);
116: markGroups.put(token, l);
117: } else {
118: List<TokenEntry> l = (List<TokenEntry>) o;
119: l.add(token);
120: }
121: } else {
122: lastHash = 0;
123: for (int end = Math.max(0, i - min + 1); i > end; i--) {
124: token = code.get(i - 1);
125: lastHash = MOD * lastHash + token.getIdentifier();
126: if (token == TokenEntry.EOF) {
127: break;
128: }
129: }
130: }
131: }
132: return markGroups;
133: }
134: }
|