001: /*
002: * Regsub.java
003: *
004: * Brazil project web application Framework,
005: * export version: 1.1
006: * Copyright (c) 1999 Sun Microsystems, Inc.
007: *
008: * Sun Public License Notice
009: *
010: * The contents of this file are subject to the Sun Public License Version
011: * 1.0 (the "License"). You may not use this file except in compliance with
012: * the License. A copy of the License is included as the file "license.terms",
013: * and also available at http://www.sun.com/
014: *
015: * The Original Code is from:
016: * Brazil project web application Framework release 1.1.
017: * The Initial Developer of the Original Code is: cstevens.
018: * Portions created by cstevens are Copyright (C) Sun Microsystems, Inc.
019: * All Rights Reserved.
020: *
021: * Contributor(s): cstevens.
022: *
023: * Version: 1.4
024: * Created by cstevens on 99/08/10
025: * Last modified by cstevens on 99/10/14 13:11:16
026: */
027:
028: package sunlabs.brazil.util.regexp;
029:
030: /**
031: * The <code>Regsub</code> class provides an iterator-like object to
032: * extract the matched and unmatched portions of a string with respect to
033: * a given regular expression.
034: * <p>
035: * After each match is found, the portions of the string already
036: * checked are not searched again -- searching for the next match will
037: * begin at the character just after where the last match ended.
038: * <p>
039: * Here is an example of using Regsub to replace all "%XX" sequences in
040: * a string with the ASCII character represented by the hex digits "XX":
041: * <pre>
042: * public static void
043: * main(String[] args)
044: * throws Exception
045: * {
046: * Regexp re = new Regexp("%[a-fA-F0-9][a-fA-F0-9]");
047: * Regsub rs = new Regsub(re, args[0]);
048: *
049: * StringBuffer sb = new StringBuffer();
050: *
051: * while (rs.nextMatch()) {
052: * sb.append(rs.skipped());
053: *
054: * String match = rs.matched();
055: *
056: * int hi = Character.digit(match.charAt(1), 16);
057: * int lo = Character.digit(match.charAt(2), 16);
058: * sb.append((char) ((hi << 4) | lo));
059: * }
060: * sb.append(rs.rest());
061: *
062: * System.out.println(sb);
063: * }
064: * </pre>
065: *
066: * @author Colin Stevens (colin.stevens@sun.com)
067: * @version 1.4, 99/10/14
068: * @see Regexp
069: */
070: public class Regsub {
071: Regexp r;
072: String str;
073: int ustart;
074: int mstart;
075: int end;
076: Regexp.Match m;
077:
078: /**
079: * Construct a new <code>Regsub</code> that can be used to step
080: * through the given string, finding each substring that matches
081: * the given regular expression.
082: * <p>
083: * <code>Regexp</code> contains two substitution methods,
084: * <code>sub</code> and <code>subAll</code>, that can be used instead
085: * of <code>Regsub</code> if just simple substitutions are being done.
086: *
087: * @param r
088: * The compiled regular expression.
089: *
090: * @param str
091: * The string to search.
092: *
093: * @see Regexp#sub
094: * @see Regexp#subAll
095: */
096: public Regsub(Regexp r, String str) {
097: this .r = r;
098: this .str = str;
099: this .ustart = 0;
100: this .mstart = -1;
101: this .end = 0;
102: }
103:
104: /**
105: * Searches for the next substring that matches the regular expression.
106: * After calling this method, the caller would call methods like
107: * <code>skipped</code>, <code>matched</code>, etc. to query attributes
108: * of the matched region.
109: * <p>
110: * Calling this function again will search for the next match, beginning
111: * at the character just after where the last match ended.
112: *
113: * @return <code>true</code> if a match was found, <code>false</code>
114: * if there are no more matches.
115: */
116: public boolean nextMatch() {
117: ustart = end;
118:
119: /*
120: * Consume one character if the last match didn't consume any
121: * characters, to avoid an infinite loop.
122: */
123:
124: int off = ustart;
125: if (off == mstart) {
126: off++;
127: if (off >= str.length()) {
128: return false;
129: }
130: }
131:
132: m = r.exec(str, 0, off);
133: if (m == null) {
134: return false;
135: }
136:
137: mstart = m.indices[0];
138: end = m.indices[1];
139:
140: return true;
141: }
142:
143: /**
144: * Returns a substring consisting of all the characters skipped
145: * between the end of the last match (or the start of the original
146: * search string) and the start of this match.
147: * <p>
148: * This method can be used extract all the portions of string that
149: * <b>didn't</b> match the regular expression.
150: *
151: * @return The characters that didn't match.
152: */
153: public String skipped() {
154: return str.substring(ustart, mstart);
155: }
156:
157: /**
158: * Returns a substring consisting of the characters that matched
159: * the entire regular expression during the last call to
160: * <code>nextMatch</code>.
161: *
162: * @return The characters that did match.
163: *
164: * @see #submatch
165: */
166: public String matched() {
167: return str.substring(mstart, end);
168: }
169:
170: /**
171: * Returns a substring consisting of the characters that matched
172: * the given parenthesized subexpression during the last call to
173: * <code>nextMatch</code>.
174: *
175: * @param i
176: * The index of the parenthesized subexpression.
177: *
178: * @return The characters that matched the subexpression, or
179: * <code>null</code> if the given subexpression did not
180: * exist or did not match.
181: */
182: public String submatch(int i) {
183: if (i * 2 + 1 >= m.indices.length) {
184: return null;
185: }
186: int start = m.indices[i * 2];
187: int end = m.indices[i * 2 + 1];
188: if ((start < 0) || (end < 0)) {
189: return null;
190: }
191: return str.substring(start, end);
192: }
193:
194: /**
195: * Returns a substring consisting of all the characters that come
196: * after the last match. As the matches progress, the <code>rest</code>
197: * gets shorter. When <code>nextMatch</code> returns <code>false</code>,
198: * then this method will return the rest of the string that can't be
199: * matched.
200: *
201: * @return The rest of the characters after the last match.
202: */
203: public String rest() {
204: return str.substring(end);
205: }
206: }
|