001: /*
002: * $Id: MatchActionProcessor.java,v 1.10 2003/11/07 20:16:24 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.oro.text;
059:
060: import java.io.*;
061: import java.util.*;
062:
063: import org.apache.oro.text.regex.*;
064:
065: /**
066: * The MatchActionProcessor class provides AWK-like line by line filtering
067: * of a text stream, pattern action pair association, and field splitting
068: * based on a registered separator. However, the class can be used with
069: * any compatible PatternMatcher/PatternCompiler implementations and
070: * need not use the AWK matching classes in org.apache.oro.text.awk. In fact,
071: * the default matcher and compiler used by the class are Perl5Matcher and
072: * Perl5Compiler from org.apache.oro.text.regex.
073: * <p>
074: * To completely understand how to use MatchActionProcessor, you should first
075: * look at {@link MatchAction} and {@link MatchActionInfo}.
076: * A MatchActionProcessor is first initialized with
077: * the desired PatternCompiler and PatternMatcher instances to use to compile
078: * patterns and perform matches. Then, optionally, a field separator may
079: * be registered with {@link #setFieldSeparator setFieldSeparator()}
080: * Finally, as many pattern action pairs as desired are registerd with
081: * {@link #addAction addAction()} before processing the input
082: * with {@link #processMatches processMatches()}. Pattern action
083: * pairs are processed in the order they were registered.
084: * <p>
085: * The look of added actions can closely mirror that of AWK when anonymous
086: * classes are used. Here's an example of how you might use
087: * MatchActionProcessor to extract only the second column of a semicolon
088: * delimited file:
089: * <p>
090: * <pre>
091: * import java.io.*;
092: *
093: * import org.apache.oro.text.*;
094: * import org.apache.oro.text.regex.*;
095: *
096: * public final class semicolon {
097: *
098: * public static final void main(String[] args) {
099: * MatchActionProcessor processor = new MatchActionProcessor();
100: *
101: * try {
102: * processor.setFieldSeparator(";");
103: * // Using a null pattern means to perform the action for every line.
104: * processor.addAction(null, new MatchAction() {
105: * public void processMatch(MatchActionInfo info) {
106: * // We assume the second column exists
107: * info.output.println(info.fields.elementAt(1));
108: * }
109: * });
110: * } catch(MalformedPatternException e) {
111: * e.printStackTrace();
112: * System.exit(1);
113: * }
114: *
115: * try {
116: * processor.processMatches(System.in, System.out);
117: * } catch(IOException e) {
118: * e.printStackTrace();
119: * System.exit(1);
120: * }
121: * }
122: *}
123: * </pre>
124: * You can redirect the following sample input to stdin to test the code:
125: * <pre>
126: * 1;Trenton;New Jersey
127: * 2;Annapolis;Maryland
128: * 3;Austin;Texas
129: * 4;Richmond;Virginia
130: * 5;Harrisburg;Pennsylvania
131: * 6;Honolulu;Hawaii
132: * 7;Santa Fe;New Mexico
133: * </pre>
134: *
135: * @version @version@
136: * @since 1.0
137: * @see MatchAction
138: * @see MatchActionInfo
139: */
140: public final class MatchActionProcessor {
141: private Pattern __fieldSeparator = null;
142: private PatternCompiler __compiler;
143: private PatternMatcher __matcher;
144: // If a pattern is null, it means to do it for every line.
145: private Vector __patterns = new Vector();
146: private Vector __actions = new Vector();
147:
148: private MatchAction __defaultAction = new DefaultMatchAction();
149:
150: /**
151: * Creates a new MatchActionProcessor instance initialized with the specified
152: * pattern compiler and matcher. The field separator is set to null by
153: * default, which means that matched lines will not be split into separate
154: * fields unless the field separator is set with
155: * {@link #setFieldSeparator setFieldSeparator()}.
156: * <p>
157: * @param compiler The PatternCompiler to use to compile registered
158: * patterns.
159: * @param matcher The PatternMatcher to use when searching for matches.
160: */
161: public MatchActionProcessor(PatternCompiler compiler,
162: PatternMatcher matcher) {
163: __compiler = compiler;
164: __matcher = matcher;
165: }
166:
167: /**
168: * Default constructor for MatchActionProcessor. Same as calling
169: * <blockquote><code>
170: * MatchActionProcessor(new Perl5Compiler(), new Perl5Matcher());
171: * </code></blockquote>
172: */
173: public MatchActionProcessor() {
174: this (new Perl5Compiler(), new Perl5Matcher());
175: }
176:
177: /**
178: * Registers a pattern action pair, providing options to be used to
179: * compile the pattern. If a pattern is null, the action
180: * is performed for every line of input.
181: * <p>
182: * @param pattern The pattern to bind to an action.
183: * @param options The compilation options to use for the pattern.
184: * @param action The action to associate with the pattern.
185: * @exception MalformedPatternException If the pattern cannot be compiled.
186: */
187: public void addAction(String pattern, int options,
188: MatchAction action) throws MalformedPatternException {
189: if (pattern != null)
190: __patterns.addElement(__compiler.compile(pattern, options));
191: else
192: __patterns.addElement(null);
193: __actions.addElement(action);
194: }
195:
196: /**
197: * Binds a patten to the default action, providing options to be
198: * used to compile the pattern. The default action is to simply print
199: * the matched line to the output. If a pattern is null, the action
200: * is performed for every line of input.
201: * <p>
202: * @param pattern The pattern to bind to an action.
203: * @param options The compilation options to use for the pattern.
204: * @exception MalformedPatternException If the pattern cannot be compiled.
205: */
206: public void addAction(String pattern, int options)
207: throws MalformedPatternException {
208: addAction(pattern, options, __defaultAction);
209: }
210:
211: /**
212: * Binds a patten to the default action. The default action is to simply
213: * print the matched line to the output. If a pattern is null, the action
214: * is performed for every line of input.
215: * <p>
216: * @param pattern The pattern to bind to an action.
217: * @exception MalformedPatternException If the pattern cannot be compiled.
218: */
219: public void addAction(String pattern)
220: throws MalformedPatternException {
221: addAction(pattern, 0);
222: }
223:
224: /**
225: * Registers a pattern action pair. If a pattern is null, the action
226: * is performed for every line of input.
227: * <p>
228: * @param pattern The pattern to bind to an action.
229: * @param action The action to associate with the pattern.
230: * @exception MalformedPatternException If the pattern cannot be compiled.
231: */
232: public void addAction(String pattern, MatchAction action)
233: throws MalformedPatternException {
234: addAction(pattern, 0, action);
235: }
236:
237: /**
238: * Sets the field separator to use when splitting a line into fields.
239: * If the field separator is never set, or set to null, matched input
240: * lines are not split into fields.
241: * <p>
242: * @param separator A regular expression defining the field separator.
243: * @param options The options to use when compiling the separator.
244: * @exception MalformedPatternException If the separator cannot be compiled.
245: */
246: public void setFieldSeparator(String separator, int options)
247: throws MalformedPatternException {
248: if (separator == null) {
249: __fieldSeparator = null;
250: return;
251: }
252: __fieldSeparator = __compiler.compile(separator, options);
253: }
254:
255: /**
256: * Sets the field separator to use when splitting a line into fields.
257: * If the field separator is never set, or set to null, matched input
258: * lines are not split into fields.
259: * <p>
260: * @param separator A regular expression defining the field separator.
261: * @exception MalformedPatternException If the separator cannot be compiled.
262: */
263: public void setFieldSeparator(String separator)
264: throws MalformedPatternException {
265: setFieldSeparator(separator, 0);
266: }
267:
268: /**
269: * This method reads the provided input one line at a time and for
270: * every registered pattern that is contained in the line it executes
271: * the associated MatchAction's processMatch() method. If a field
272: * separator has been defined with
273: * {@link #setFieldSeparator setFieldSeparator()}, the
274: * fields member of the MatchActionInfo instance passed to the
275: * processMatch() method is set to a Vector of Strings containing
276: * the split fields of the line. Otherwise the fields member is set
277: * to null. If no match was performed to invoke the action (i.e.,
278: * a null pattern was registered), then the match member is set
279: * to null. Otherwise, the match member will contain the result of
280: * the match.
281: * <p>
282: * The input stream, having been exhausted, is closed right before the
283: * method terminates and the output stream is flushed.
284: * <p>
285: * @see MatchActionInfo
286: * @param input The input stream from which to read lines.
287: * @param output Where to send output.
288: * @param encoding The character encoding of the InputStream source.
289: * If you also want to define an output character encoding,
290: * you should use {@link #processMatches(Reader, Writer)}
291: * and specify the encodings when creating the Reader and
292: * Writer sources and sinks.
293: * @exception IOException If an error occurs while reading input
294: * or writing output.
295: */
296: public void processMatches(InputStream input, OutputStream output,
297: String encoding) throws IOException {
298: processMatches(new InputStreamReader(input, encoding),
299: new OutputStreamWriter(output));
300: }
301:
302: /**
303: * This method reads the provided input one line at a time using the
304: * platform standart character encoding and for every registered
305: * pattern that is contained in the line it executes the associated
306: * MatchAction's processMatch() method. If a field separator has been
307: * defined with {@link #setFieldSeparator setFieldSeparator()}, the
308: * fields member of the MatchActionInfo instance passed to the
309: * processMatch() method is set to a Vector of Strings containing
310: * the split fields of the line. Otherwise the fields member is set
311: * to null. If no match was performed to invoke the action (i.e.,
312: * a null pattern was registered), then the match member is set
313: * to null. Otherwise, the match member will contain the result of
314: * the match.
315: *
316: * <p>
317: * The input stream, having been exhausted, is closed right before the
318: * method terminates and the output stream is flushed.
319: * <p>
320: *
321: * @see MatchActionInfo
322: * @param input The input stream from which to read lines.
323: * @param output Where to send output.
324: * @exception IOException If an error occurs while reading input
325: * or writing output.
326: */
327: public void processMatches(InputStream input, OutputStream output)
328: throws IOException {
329: processMatches(new InputStreamReader(input),
330: new OutputStreamWriter(output));
331: }
332:
333: /**
334: * This method reads the provided input one line at a time and for
335: * every registered pattern that is contained in the line it executes
336: * the associated MatchAction's processMatch() method. If a field
337: * separator has been defined with
338: * {@link #setFieldSeparator setFieldSeparator()}, the
339: * fields member of the MatchActionInfo instance passed to the
340: * processMatch() method is set to a Vector of Strings containing
341: * the split fields of the line. Otherwise the fields member is set
342: * to null. If no match was performed to invoke the action (i.e.,
343: * a null pattern was registered), then the match member is set
344: * to null. Otherwise, the match member will contain the result of
345: * the match.
346: * <p>
347: * The input stream, having been exhausted, is closed right before the
348: * method terminates and the output stream is flushed.
349: * <p>
350: * @see MatchActionInfo
351: * @param input The input stream from which to read lines.
352: * @param output Where to send output.
353: * @exception IOException If an error occurs while reading input
354: * or writing output.
355: */
356: public void processMatches(Reader input, Writer output)
357: throws IOException {
358: int patternCount, current;
359: LineNumberReader reader = new LineNumberReader(input);
360: PrintWriter writer = new PrintWriter(output);
361: MatchActionInfo info = new MatchActionInfo();
362: Object obj;
363: Pattern pattern;
364: MatchAction action;
365: List fields = new ArrayList();
366:
367: // Set those things that will not change.
368: info.matcher = __matcher;
369: info.fieldSeparator = __fieldSeparator;
370: info.input = reader;
371: info.output = writer;
372: info.fields = null;
373: patternCount = __patterns.size();
374:
375: info.lineNumber = 0;
376:
377: while ((info.line = reader.readLine()) != null) {
378: info.charLine = info.line.toCharArray();
379: for (current = 0; current < patternCount; current++) {
380: obj = __patterns.elementAt(current);
381: // If a pattern is null, it means to do it for every line.
382: if (obj != null) {
383: pattern = (Pattern) __patterns.elementAt(current);
384: if (__matcher.contains(info.charLine, pattern)) {
385: info.match = __matcher.getMatch();
386: info.lineNumber = reader.getLineNumber();
387: info.pattern = pattern;
388: if (__fieldSeparator != null) {
389: fields.clear();
390: Util.split(fields, __matcher,
391: __fieldSeparator, info.line);
392: info.fields = fields;
393: } else
394: info.fields = null;
395: action = (MatchAction) __actions
396: .elementAt(current);
397: action.processMatch(info);
398: }
399: } else {
400: info.match = null;
401: info.lineNumber = reader.getLineNumber();
402: if (__fieldSeparator != null) {
403: fields.clear();
404: Util.split(fields, __matcher, __fieldSeparator,
405: info.line);
406: info.fields = fields;
407: } else
408: info.fields = null;
409: action = (MatchAction) __actions.elementAt(current);
410: action.processMatch(info);
411: }
412: }
413: }
414:
415: // Flush output but don't close, close input since we reached end.
416: writer.flush();
417: reader.close();
418: }
419:
420: }
|