001: /*
002: * $Id: prefixExample.java,v 1.7 2003/11/07 20:16:23 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package examples;
059:
060: import org.apache.oro.text.regex.*;
061:
062: /**
063: * This is a test program demonstrating an application of the matchesPrefix()
064: * methods introduced in OROMatcher v1.0.6. This example program shows how
065: * you might tokenize a stream of input using whitespace as a token
066: * separator. Don't forget to use quotes around the input on the command
067: * line, e.g.
068: * java prefixExample "Test to see if 1.0 is real and 2 is an integer"
069: *
070: * If you don't need the power of a full blown lexer generator, you can
071: * easily use regular expressions to create your own tokenization and
072: * simple parsing classes using similar approaches.
073: *
074: * @version @version@
075: */
076: public final class prefixExample {
077: public static final int REAL = 0;
078: public static final int INTEGER = 1;
079: public static final int STRING = 2;
080:
081: public static final String[] types = { "Real", "Integer", "String" };
082: public static final String whitespace = "\\s+";
083: public static final String[] tokens = {
084: "-?\\d*\\.\\d+(?:[eE][-+]-?\\d+)?(?=\\s|$)",
085: "-?\\d+(?=\\s|$)", "\\S+" };
086: public static final String tokens2 = "(-?\\d*\\.\\d+(?:[eE][-+]-?\\d+)?(?=\\s|$))|(-?\\d+(?=\\s|$))|(\\S+)";
087:
088: public static final void main(String args[]) {
089: int token;
090: PatternMatcherInput input;
091: PatternMatcher matcher;
092: PatternCompiler compiler;
093: Pattern[] patterns;
094: Pattern tokenSeparator = null, patterns2 = null;
095:
096: if (args.length < 1) {
097: System.err.println("Usage: prefixExample <sample input>");
098: System.exit(1);
099: }
100:
101: input = new PatternMatcherInput(args[0]);
102: compiler = new Perl5Compiler();
103: patterns = new Pattern[tokens.length];
104:
105: try {
106: tokenSeparator = compiler.compile(whitespace);
107: patterns2 = compiler.compile(tokens2);
108: for (token = 0; token < tokens.length; token++)
109: patterns[token] = compiler.compile(tokens[token]);
110: } catch (MalformedPatternException e) {
111: System.err.println("Bad pattern.");
112: e.printStackTrace();
113: System.exit(1);
114: }
115:
116: matcher = new Perl5Matcher();
117:
118: System.out.println("\nOne approach.\n");
119:
120: do {
121: for (token = 0; token < tokens.length; token++)
122: if (matcher.matchesPrefix(input, patterns[token])) {
123: System.out.println(types[token] + ": "
124: + matcher.getMatch());
125: break;
126: }
127: } while (matcher.contains(input, tokenSeparator));
128:
129: // An alternative approach using the tokens2 expression which
130: // packs all the token patterns into one regular expression.
131: // As in Perl, there's more than one way to do something in Java.
132: System.out.println("\nAn equivalent alternative.\n");
133:
134: input.setCurrentOffset(input.getBeginOffset());
135: do {
136: if (matcher.matchesPrefix(input, patterns2)) {
137: MatchResult result = matcher.getMatch();
138:
139: for (token = 1; token <= tokens.length; token++) {
140: if (result.group(token) != null) {
141: System.out.println(types[token - 1] + ": "
142: + result);
143: break;
144: }
145: }
146: }
147: } while (matcher.contains(input, tokenSeparator));
148:
149: }
150: }
|