001: /*
002:
003: This software is OSI Certified Open Source Software.
004: OSI Certified is a certification mark of the Open Source Initiative.
005:
006: The license (Mozilla version 1.0) can be read at the MMBase site.
007: See http://www.MMBase.org/license
008:
009: */
010: package org.mmbase.util.transformers;
011:
012: import java.util.*;
013: import java.io.*;
014: import java.util.regex.*;
015:
016: import org.mmbase.util.ResourceWatcher;
017: import org.mmbase.util.xml.UtilReader;
018: import org.mmbase.util.Entry;
019: import org.mmbase.util.Casting;
020:
021: import org.mmbase.util.logging.*;
022:
023: /**
024: * Finds regexps in the Character String, and replaces them. The replaced regexps can be found in a configuration file 'regexps.xml' (if it is present).
025: * It ignores existing XML markup, and also avoids trailing dots and comments and surrounding quotes and parentheses.
026: *
027: * @author Michiel Meeuwissen
028: * @since MMBase-1.8
029: */
030:
031: public class RegexpReplacer extends ChunkedTransformer {
032: private static final Logger log = Logging
033: .getLoggerInstance(RegexpReplacer.class);
034:
035: /**
036: * Every extension of regexp-replacer can make use of this.
037: */
038: private static final Map<String, UtilReader> utilReaders = new HashMap<String, UtilReader>(); // class -> utilreader
039:
040: /**
041: * The regexps for the unextended RegexpReplacer
042: */
043: protected static final Collection<Entry<Pattern, String>> regexps = new ArrayList<Entry<Pattern, String>>();
044:
045: protected static abstract class PatternWatcher extends
046: ResourceWatcher {
047: protected Collection<Entry<Pattern, String>> patterns;
048:
049: PatternWatcher(Collection<Entry<Pattern, String>> p) {
050: patterns = p;
051: }
052: }
053:
054: static {
055: new RegexpReplacer().readPatterns(regexps);
056: }
057:
058: public RegexpReplacer(int i) {
059: super (i);
060: }
061:
062: public RegexpReplacer() {
063: super (WORDS);
064: }
065:
066: /**
067: * This on default gives the regexps configured for the base-class (a static member). You can
068: * override this method to return another Collection.
069: */
070: protected Collection<Entry<Pattern, String>> getPatterns() {
071: return regexps;
072: }
073:
074: /**
075: * This can be overridden if the implementation must use its own configuration file.
076: */
077: protected String getConfigFile() {
078: return "regexps.xml";
079: }
080:
081: /**
082: * Reads defaults translation patterns into the given collection patterns. Override this for
083: * other default patterns.
084: */
085: protected void readDefaultPatterns(
086: Collection<Entry<Pattern, String>> patterns) {
087: }
088:
089: /**
090: * Reads patterns from config-file into given Collection
091: */
092: protected final void readPatterns(
093: Collection<Entry<Pattern, String>> patterns) {
094: UtilReader utilReader = utilReaders.get(this .getClass()
095: .getName());
096: if (utilReader == null) {
097: utilReader = new UtilReader(getConfigFile(),
098: new PatternWatcher(patterns) {
099: public void onChange(String file) {
100: readPatterns(patterns);
101: }
102: });
103: utilReaders.put(this .getClass().getName(), utilReader);
104: }
105:
106: patterns.clear();
107:
108: Collection<?> regs = utilReader.getMaps().get("regexps");
109: if (regs != null) {
110: addPatterns(regs, patterns);
111: } else {
112: readDefaultPatterns(patterns);
113: }
114: }
115:
116: /**
117: * Utility function to create a bunch of patterns.
118: * @param list A Collection of Map.Entry (like {@link java.util.Map#entrySet()}), containing
119: * pairs of Strings
120: * @param patterns This the Collection of Entries. The key of every entry is a compiled regular
121: * expression. The value is still a String. New entries will be added to this collection
122: * by this function.
123: */
124: protected static void addPatterns(Collection<?> list,
125: Collection<Entry<Pattern, String>> patterns) {
126: if (list != null) {
127: Iterator<?> i = list.iterator();
128: while (i.hasNext()) {
129: Object next = i.next();
130: Pattern p;
131: String result;
132: if (next == null) {
133: log.warn("Found null in " + list);
134: continue;
135: } else if (next instanceof Map.Entry) {
136: Map.Entry<?, ?> entry = (Map.Entry<?, ?>) next;
137: p = Pattern.compile(Casting
138: .toString(entry.getKey()));
139: Object value = entry.getValue();
140: if (value instanceof Collection) {
141: result = null;
142: Iterator<?> j = ((Collection<?>) value)
143: .iterator();
144: while (j.hasNext()) {
145: Object n = j.next();
146: if (!(n instanceof Map.Entry)) {
147: log
148: .warn("Could not understand "
149: + n.getClass()
150: + " '"
151: + n
152: + "' (in collection "
153: + value
154: + "). It should be a Map.Entry.");
155: continue;
156: }
157: Map.Entry<?, ?> subEntry = (Map.Entry<?, ?>) n;
158: Object key = subEntry.getKey();
159: if ("key".equals(key)) {
160: p = Pattern.compile(Casting
161: .toString(subEntry.getValue()));
162: continue;
163: }
164: if ("value".equals(key)) {
165: result = Casting.toString(subEntry
166: .getValue());
167: }
168: }
169: if (result == null)
170: result = "";
171: } else {
172: result = Casting.toString(value);
173: }
174: } else {
175: log.warn("Could not understand " + next.getClass()
176: + " '" + next
177: + "'. It should be a Map.Entry.");
178: continue;
179: }
180: patterns.add(new Entry<Pattern, String>(p, result));
181: }
182: }
183: }
184:
185: protected boolean replace(String string, Writer w, Status status)
186: throws IOException {
187: Iterator<Entry<Pattern, String>> i = getPatterns().iterator();
188:
189: boolean r = false;
190: while (i.hasNext()) {
191: Entry<Pattern, String> entry = i.next();
192: Pattern p = entry.getKey();
193: if (replaceFirstAll && status.used.contains(p))
194: continue;
195: Matcher m = p.matcher(string);
196: String replacement = entry.getValue();
197: boolean result = m.find();
198: if (result) {
199: r = true;
200: StringBuffer sb = new StringBuffer();
201: do {
202: status.replaced++;
203: m.appendReplacement(sb, replacement);
204: if (replaceFirst)
205: break;
206: result = m.find();
207: } while (result);
208: m.appendTail(sb);
209: if (replaceFirstAll)
210: status.used.add(p);
211: string = sb.toString();
212: }
213:
214: }
215:
216: w.write(string);
217: return r;
218:
219: }
220:
221: protected final String base() {
222: return "REGEXPS";
223: }
224:
225: public String toString() {
226: return getEncoding() + " " + getPatterns();
227: }
228:
229: }
|