001: /*
002: * $Id: streamInputExample.java,v 1.7 2003/11/07 20:16:23 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package examples.awk;
059:
060: import java.io.*;
061: import org.apache.oro.text.regex.*;
062: import org.apache.oro.text.awk.*;
063:
064: /**
065: * This is a test program demonstrating how to search an input stream
066: * with the AwkTools regular expression classes.
067: *
068: * @version @version@
069: */
070: public final class streamInputExample {
071:
072: /**
073: * This program extracts sentences containing the word C++ from
074: * the sample file streamInputExample.txt The regular expression
075: * used is not perfect, so focus on AwkStreamInput and not the
076: * ability of the regular expression to handle all normal sentences.
077: * For those not familiar with the OROMatcher Util class, a use of
078: * the Util.substitute method is included.
079: */
080: public static final void main(String args[]) {
081:
082: // A regular expression to extract sentences containing the word C++.
083: // We assume sentences can only end in . ! ? and start with a word
084: // character \w
085: String regex = "(\\w[^\\.?!]*C\\+\\+|C\\+\\+)[^\\.?!]*[\\.?!]";
086: String sentence;
087: AwkMatcher matcher;
088: AwkCompiler compiler;
089: Pattern pattern = null, newline = null;
090: AwkStreamInput input;
091: MatchResult result;
092: Reader file = null;
093:
094: // Create AwkCompiler and AwkMatcher instances.
095: compiler = new AwkCompiler();
096: matcher = new AwkMatcher();
097:
098: // Attempt to compile the pattern. If the pattern is not valid,
099: // report the error and exit.
100: try {
101: pattern = compiler.compile(regex,
102: AwkCompiler.CASE_INSENSITIVE_MASK);
103: // Compile a pattern representing a string of newlines with other
104: // whitespace stuck around the newlines
105: newline = compiler.compile("(\\s*[\n\r]\\s*)+");
106: } catch (MalformedPatternException e) {
107: System.err.println("Bad pattern.");
108: System.err.println(e.getMessage());
109: System.exit(1);
110: }
111:
112: // Open input file.
113: try {
114: file = new FileReader("streamInputExample.txt");
115: } catch (IOException e) {
116: System.err.println("Error opening streamInputExample.txt.");
117: System.err.println(e.getMessage());
118: System.exit(1);
119: }
120:
121: // Create an AwkStreamInput instance to search the input stream.
122: input = new AwkStreamInput(file);
123:
124: // We need to put the search loop in a try block because when searching
125: // an AwkStreamInput instance, an IOException may occur, and it must be
126: // caught.
127: try {
128: // Loop until there are no more matches left.
129: while (matcher.contains(input, pattern)) {
130: // Since we're still in the loop, fetch match that was found.
131: result = matcher.getMatch();
132:
133: // Substitute all newlines in the match with spaces.
134: sentence = Util.substitute(matcher, newline,
135: new StringSubstitution(" "), result.toString(),
136: Util.SUBSTITUTE_ALL);
137: System.out.println("\nMatch:\n" + sentence);
138: }
139: } catch (IOException e) {
140: System.err.println("Error occurred while reading file.");
141: System.err.println(e.getMessage());
142: System.exit(1);
143: }
144: }
145: }
|