001: /*
002: * 01/07/2003 - 15:19:32
003: *
004: * PScanner.java -
005: * Copyright (C) 2003 Buero fuer Softwarearchitektur GbR
006: * ralf.meyer@karneim.com
007: * http://jrexx.sf.net
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU Lesser General Public License
011: * as published by the Free Software Foundation; either version 2
012: * of the License, or (at your option) any later version.
013: *
014: * This program is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser General Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser General Public License
020: * along with this program; if not, write to the Free Software
021: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
022: */
023: package com.tc.jrexx.regex;
024:
025: import java.util.Vector;
026: import java.text.ParsePosition;
027:
028: class PScanner {
029: public static final int UNLIMITED_MAX_LENGTH = Integer.MAX_VALUE;
030:
031: private final Automaton_Pattern.TerminalFormat[] terminalFormats;
032: private final int[] terminalsMaxLength;
033: private final boolean terminalFormatsAreExclusive;
034:
035: public PScanner(Automaton_Pattern.TerminalFormat[] terminalFormats) {
036: this (terminalFormats, false);
037: }
038:
039: public PScanner(Automaton_Pattern.TerminalFormat[] terminalFormats,
040: boolean terminalFormatsAreExclusive) {
041: this .terminalFormats = terminalFormats;
042: this .terminalFormatsAreExclusive = terminalFormatsAreExclusive;
043:
044: final int n = this .terminalFormats.length;
045: if (!this .terminalFormatsAreExclusive) {
046: // reverse terminalFormats list
047: for (int i = (n - 1) >> 1; i >= 0; --i) {
048: Automaton_Pattern.TerminalFormat temp = this .terminalFormats[i];
049: this .terminalFormats[i] = this .terminalFormats[n - i];
050: this .terminalFormats[n - i] = temp;
051: }
052: }
053: this .terminalsMaxLength = new int[n];
054: for (int i = 0; i < n; i++) {
055: this .terminalsMaxLength[i] = this .terminalFormats[i]
056: .maxLength();
057: }
058: }
059:
060: public Vector scan(String source) {
061: return this .scan(source, 0);
062: }
063:
064: public Vector scan(String source, int startIndex) {
065: if (source == null) {
066: String message = "null source specified";
067: throw new IllegalArgumentException(message);
068: }
069:
070: final char[] input = source.toCharArray();
071:
072: int firstIndexOfTerminalFormats = -1;
073: int lastIndexOfTerminalFormats = -1;
074:
075: for (int i = this .terminalFormats.length - 1; i >= 0; i--) {
076: if (this .terminalFormats[i] != null) {
077: lastIndexOfTerminalFormats = i;
078: break;
079: }
080: }
081:
082: if (lastIndexOfTerminalFormats == -1) {
083: String message = "no terminal formats added";
084: throw new NullPointerException(message);
085: }
086:
087: for (int i = 0; i <= lastIndexOfTerminalFormats; i++) {
088: if (this .terminalFormats[i] != null) {
089: firstIndexOfTerminalFormats = i;
090: break;
091: }
092: }
093:
094: // System.out.println("Scanner start on: "+new String(input,startIndex,input.length-startIndex));
095: final Vector tokenList = new Vector();
096: final int inputLength = input.length;
097: final ParsePosition pos = new ParsePosition(startIndex);
098: int index = startIndex;
099: while (index < inputLength) {
100: int longestMatch = -1;
101: Object lastToken = null, token;
102: for (int i = lastIndexOfTerminalFormats; i >= firstIndexOfTerminalFormats; i--) {
103: if (this .terminalsMaxLength[i] >= longestMatch) {
104: pos.setIndex(index);
105:
106: //System.out.print(this.terminalFormats[i].getClass().getName().substring(this.terminalFormats[i].getClass().getName().indexOf(".")+1));
107: //System.out.print(".scan("+new String(input,pos.getIndex(),input.length-pos.getIndex())+") -> ");
108: token = this .terminalFormats[i].parseObject(input,
109: pos);
110: //System.out.println(token+" -> "+new String(input,pos.getIndex(),input.length-pos.getIndex()));
111: final int matchLength = pos.getIndex() - index;
112: if (token != null) {
113: if (this .terminalFormatsAreExclusive) {
114: longestMatch = matchLength;
115: lastToken = token;
116: break;
117: } else {
118: if (matchLength >= longestMatch) {
119: longestMatch = matchLength;
120: lastToken = token;
121: }
122: }
123: }
124: }
125: }
126: //if (lastToken!=null) System.out.println("Token recognized: "+lastToken);
127: if (lastToken != null)
128: tokenList.addElement(lastToken);
129: else {
130: String message = "can not scan input:"
131: + "\n"
132: + new String(input, startIndex, input.length
133: - startIndex)
134: + "\nerrorPosition: "
135: + index
136: + "\n"
137: + new String(input, index, input.length - index);
138: throw new ParseException(message);
139: }
140: index += longestMatch;
141: }
142:
143: return tokenList;
144: }
145:
146: public String toString() {
147: StringBuffer answer = new StringBuffer();
148: answer.append("Scanner(");
149: if (this .terminalFormatsAreExclusive)
150: answer.append("exclusive");
151: answer.append(")");
152: for (int i = 0; i < this .terminalFormats.length; i++)
153: if (this .terminalFormats[i] != null)
154: answer.append('\n').append(this.terminalFormats[i]);
155: return answer.toString();
156: }
157:
158: }
|