001: package util;
002:
003: import org.apache.oro.text.regex.*;
004: import org.apache.oro.text.perl.*;
005: import java.util.*;
006:
007: /**
008: *provided as a wrapper so that we could substitute these with
009: * other calls in case we cant deploy the perl libraries currently used.
010: @author rahul kumar
011: @version $Id: PerlWrapper.java,v 1.1 2004/01/01 06:46:33 rahul_kumar Exp rahul $
012: */
013: public class PerlWrapper {
014:
015: public static Perl5Util perl = new Perl5Util();
016:
017: /**
018: * Takes a regular expression and string as input and reports all the
019: * pattern matches in the string.
020: * <p>
021: * @param args[] The array of arguments to the program. The first
022: * argument should be a Perl5 regular expression, and the second
023: * should be an input string.
024: */
025: /*
026: public static final void main(String args[]) {
027:
028: MultipleMatchModel mmm = new MultipleMatchModel(){
029: Hashtable ht = new Hashtable();
030: String value;
031: public void setValueAt(String s, int row, int group){
032: if (group==1) value = s;
033: else
034: if (group==2)
035: ht.put(s,value);
036: }
037: public String getValueAt (int row, int group){
038: return null;
039: }
040:
041: public Object getObject (){
042: return ht;
043: }
044:
045: };
046: try {
047: getMultipleMatches(
048: "((?:\\w+\\.)*[A-Z]\\w*)\\s+(\\w+)", "java.io.Exception exc; Date f1 = new Date(ccc); f1.get(ZZZ); Random ran = new Random(); Date rand ;", mmm);
049: Hashtable hta = (Hashtable) mmm.getObject();
050: System.out.println( "f1:"+ (String)hta.get("f1"));
051: System.out.println( "exc:"+ (String)hta.get("exc"));
052: System.out.println( "ran:"+ (String)hta.get("ran"));
053: System.out.println( "rand:"+ (String)hta.get("rand"));
054: } catch (Exception exc) { System.err.println( "pw EXC:"+ exc.toString()); }
055:
056: }
057: */
058: /** receives a regular expression and a string to search in,
059: * it updates the given model object.
060: * The purpose of the model object is that we do not know how many
061: * groups the regexp has, and what to do with those groups. Does
062: * the user want a hashtable with a particular group as key, and
063: * another as value or what.
064: */
065: public static void getMultipleMatches(String regexp,
066: String content, MultipleMatchModel mmm)
067: throws MalformedPatternException {
068: int groups;
069: PatternMatcher matcher;
070: PatternCompiler compiler;
071: Pattern pattern = null;
072: PatternMatcherInput input;
073: MatchResult result;
074:
075: // Create Perl5Compiler and Perl5Matcher instances.
076: compiler = new Perl5Compiler();
077: matcher = new Perl5Matcher();
078:
079: // Attempt to compile the pattern. If the pattern is not valid,
080: // report the error and exit.
081: pattern = compiler.compile(regexp);
082:
083: // Create a PatternMatcherInput instance to keep track of the position
084: // where the last match finished, so that the next match search will
085: // start from there. You always create a PatternMatcherInput instance
086: // when you want to search a string for all of the matches it contains,
087: // and not just the first one.
088: input = new PatternMatcherInput(content);
089:
090: // Loop until there are no more matches left.
091: int row = 0;
092: while (matcher.contains(input, pattern)) {
093: // Since we're still in the loop, fetch match that was found.
094: result = matcher.getMatch();
095:
096: // Retrieve the number of matched groups. A group corresponds to
097: // a parenthesized set in a pattern.
098: groups = result.groups();
099:
100: for (int group = 1; group < groups; group++) {
101: mmm.setValueAt(result.group(group), row, group);
102: }
103: row++;
104: }
105: }
106:
107: /** returns true if the regular expression and text match.
108: * Slashes will be placed around the regex sent.
109: */
110: public static boolean isMatching(String regex, String text) {
111: return (perl.match("/" + regex + "/", text));
112: }
113:
114: /** caller must supply / on both sides with whatever flags he wants
115: * such as i for ignore case.
116: * e.g. /CREATE/i
117: */
118: public static boolean isMatchingRE(String regex, String text) {
119: return (perl.match(regex, text));
120: }
121:
122: /** returns the results of a single match on a string, returning all
123: * matched groups.
124: * e.g.
125: * perlMatch("(public|protected)\\s+(class|interface)\\s+(\w+)",
126: * "public class Hey") would return an array {"public","class","Hey"}
127: */
128: public static String[] perlMatch(String regex, String text) {
129: if (perl.match("/" + regex + "/", text)) {
130: int gr = perl.groups();
131: String[] result = new String[gr - 1];
132: for (int i = 1; i < gr; i++) {
133: result[i - 1] = perl.group(i);
134: }
135:
136: return result;
137: }
138: return null;
139: }
140:
141: public static String[] perlMatchRE(String regex, String text) {
142: if (perl.match(regex, text)) {
143: int gr = perl.groups();
144: String[] result = new String[gr - 1];
145: for (int i = 1; i < gr; i++) {
146: result[i - 1] = perl.group(i);
147: }
148: return result;
149: }
150: return null;
151: }
152:
153: /** will substitute according to the pattern in the text and return
154: * the new text.
155: * e.g. line = perlSubstitute("s/$ID/+ var +"/g", line);
156: * for interpolations use $1 $2 etc
157: * /g means global replace
158: */
159: public static String perlSubstitute(String pattern, String text) {
160: return perl.substitute(pattern, text);
161: }
162:
163: /** it may be preferable to use perl's split for single matches */
164: public static MultipleMatchModel SimpleMultipleMatchModel() {
165: return new MultipleMatchModel() {
166: ArrayList al = new ArrayList();
167: String value;
168:
169: public void setValueAt(String s, int row, int group) {
170: if (group == 1)
171: al.add(s);
172: }
173:
174: public String getValueAt(int row, int group) {
175: return (String) al.get(row);
176: }
177:
178: public Object getObject() {
179: return al;
180: }
181:
182: };
183: }
184:
185: /** splits based on regexp sent in format '/regexp/'
186: */
187: public static void perlSplit(Collection coll, String pattern,
188: String input) throws MalformedPerl5PatternException {
189: perl.split(coll, pattern, input);
190: }
191:
192: /** splits based on regexp sent in format '/regexp/'
193: * The regexp is the split string.
194: */
195: public static String[] perlSplit(String pattern, String input)
196: throws MalformedPerl5PatternException {
197: ArrayList al = new ArrayList(16);
198: perl.split(al, pattern, input);
199: String[] sarr = new String[al.size()];
200: al.toArray(sarr);
201: return (sarr);
202: }
203:
204: public static void main(String args[]) {
205:
206: String input = "insert into tt ';' ;\ninsert into t1;\ninsert into mary;\n";
207:
208: String pattern = null;
209: if (args.length == 1)
210: pattern = args[0];
211: else
212: pattern = "/;\n/";
213: Vector v = new Vector();
214: perl.split(v, pattern, input);
215: System.out.println(v.size());
216: for (int i = 0; i < v.size(); i++) {
217: System.out.println(i + " " + v.get(i));
218: }
219: String ss = "abc<tab></tab>ad sda";
220: System.out.println("\n original 1:" + ss);
221: String news = PerlWrapper.perlSubstitute(
222: "s/<tab>.*?<\\/tab>/<tab>HAHA<\\/tab>/g", ss);
223: System.out.println(" put HAHA in 2:" + news);
224: news = PerlWrapper.perlSubstitute(
225: "s/<tab>.*?<\\/tab>/<tab>HAHA\n<\\/tab>/g", news);
226: System.out.println(" put HAHA with newline 3:" + news);
227: // XXX this fails since new line inside
228: news = PerlWrapper.perlSubstitute(
229: "s/<tab>.*?<\\/tab>/<tab>HANEWHA<\\/tab>/g", news);
230: System.out.println(" put HENEWHA 4:" + news);
231: String sql = "SELECT * from table where tsdate > #m-3#";
232: String m[] = perlMatch("#(\\S+)#", sql);
233: System.out.println("match:" + m[0]);
234:
235: }
236:
237: }
|