001: /*
002: * $Id: prefixExample.java,v 1.7 2003/11/07 20:16:23 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package examples.awk;
059:
060: import org.apache.oro.text.regex.*;
061: import org.apache.oro.text.awk.*;
062:
063: /**
064: * This is a test program demonstrating an application of the matchesPrefix()
065: * methods. This example program shows how you might tokenize a stream of
066: * input using whitespace as a token separator. Don't forget to use quotes
067: * around the input on the command line, e.g.
068: * java prefixExample "Test to see if 1.0 is real and 2 is an integer"
069: *
070: * If you don't need the power of a full blown lexer generator, you can
071: * easily use regular expressions to create your own tokenization and
072: * simple parsing classes using similar approaches. This example is
073: * rather sloppy. If you look at the equivalent example in the OROMatcher
074: * distribution, you'll see how to Perl's zero-width look ahead assertion
075: * makes correctness easier to achieve.
076: *
077: * @version @version@
078: */
079: public final class prefixExample {
080: public static final int REAL = 0;
081: public static final int INTEGER = 1;
082: public static final int STRING = 2;
083:
084: public static final String[] types = { "Real", "Integer", "String" };
085: public static final String whitespace = "[ \t\n\r]+";
086: public static final String[] tokens = {
087: "-?[0-9]*\\.[0-9]+([eE]-?[0-9]+)?", "-?[0-9]+",
088: "[^ \t\n\r]+" };
089:
090: public static final void main(String args[]) {
091: int token;
092: PatternMatcherInput input;
093: PatternMatcher matcher;
094: PatternCompiler compiler;
095: Pattern[] patterns;
096: Pattern tokenSeparator = null;
097: MatchResult result;
098:
099: if (args.length < 1) {
100: System.err.println("Usage: prefixExample <sample input>");
101: System.exit(1);
102: }
103:
104: input = new PatternMatcherInput(args[0]);
105: compiler = new AwkCompiler();
106: patterns = new Pattern[tokens.length];
107:
108: try {
109: tokenSeparator = compiler.compile(whitespace);
110: for (token = 0; token < tokens.length; token++)
111: patterns[token] = compiler.compile(tokens[token]);
112: } catch (MalformedPatternException e) {
113: System.err.println("Bad pattern.");
114: e.printStackTrace();
115: System.exit(1);
116: }
117:
118: matcher = new AwkMatcher();
119:
120: _whileLoop: while (!input.endOfInput()) {
121: for (token = 0; token < tokens.length; token++)
122: if (matcher.matchesPrefix(input, patterns[token])) {
123: int offset;
124: result = matcher.getMatch();
125: offset = input.getCurrentOffset();
126: input.setCurrentOffset(result.endOffset(0));
127:
128: if (matcher.matchesPrefix(input, tokenSeparator)) {
129: input.setCurrentOffset(matcher.getMatch()
130: .endOffset(0));
131: System.out
132: .println(types[token] + ": " + result);
133: continue _whileLoop;
134: } else if (input.endOfInput()) {
135: System.out
136: .println(types[token] + ": " + result);
137: break _whileLoop;
138: }
139:
140: input.setCurrentOffset(offset);
141: }
142:
143: if (matcher.matchesPrefix(input, tokenSeparator))
144: input.setCurrentOffset(matcher.getMatch().endOffset(0));
145: else {
146: System.err
147: .println("Unrecognized token starting at offset: "
148: + input.getCurrentOffset());
149: break;
150: }
151: }
152:
153: }
154: }
|