001: /*
002: * Copyright 2000 Finn Bock
003: *
004: * This program contains material copyrighted by:
005: * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
006: *
007: * This version of the SRE library can be redistributed under CNRI's
008: * Python 1.6 license. For any other use, please contact Secret Labs
009: * AB (info@pythonware.com).
010: *
011: * Portions of this engine have been developed in cooperation with
012: * CNRI. Hewlett-Packard provided funding for 1.6 integration and
013: * other compatibility work.
014: */
015:
016: package org.python.modules.sre;
017:
018: import java.util.*;
019: import org.python.core.*;
020:
021: public class PatternObject extends PyObject {
022: char[] code; /* link to the code string object */
023: public PyString pattern; /* link to the pattern source (or None) */
024: public int groups;
025: public org.python.core.PyObject groupindex;
026: public int flags;
027: org.python.core.PyObject indexgroup;
028: public int codesize;
029:
030: public PatternObject(PyString pattern, int flags, char[] code,
031: int groups, PyObject groupindex, PyObject indexgroup) {
032:
033: if (pattern != null)
034: this .pattern = pattern;
035: this .flags = flags;
036: this .code = code;
037: this .codesize = code.length;
038: this .groups = groups;
039: this .groupindex = groupindex;
040: this .indexgroup = indexgroup;
041: }
042:
043: public MatchObject match(PyObject[] args, String[] kws) {
044: ArgParser ap = new ArgParser("search", args, kws, "pattern",
045: "pos", "endpos");
046: PyString string = extractPyString(ap, 0);
047: int start = ap.getInt(1, 0);
048: int end = ap.getInt(2, string.__len__());
049: SRE_STATE state = new SRE_STATE(string.toString(), start, end,
050: flags);
051:
052: state.ptr = state.start;
053: int status = state.SRE_MATCH(code, 0, 1);
054:
055: return _pattern_new_match(state, string, status);
056: }
057:
058: public MatchObject search(PyObject[] args, String[] kws) {
059: ArgParser ap = new ArgParser("search", args, kws, "pattern",
060: "pos", "endpos");
061: PyString string = extractPyString(ap, 0);
062: int start = ap.getInt(1, 0);
063: int end = ap.getInt(2, string.__len__());
064:
065: SRE_STATE state = new SRE_STATE(string.toString(), start, end,
066: flags);
067:
068: int status = state.SRE_SEARCH(code, 0);
069:
070: return _pattern_new_match(state, string, status);
071: }
072:
073: public PyObject sub(PyObject[] args, String[] kws) {
074: ArgParser ap = new ArgParser("sub", args, kws, "repl",
075: "string", "count");
076: PyObject template = ap.getPyObject(0);
077: int count = ap.getInt(2, 0);
078:
079: return subx(template, extractPyString(ap, 1), count, false);
080: }
081:
082: public PyObject subn(PyObject[] args, String[] kws) {
083: ArgParser ap = new ArgParser("subn", args, kws, "repl",
084: "string", "count");
085: PyObject template = ap.getPyObject(0);
086: int count = ap.getInt(2, 0);
087:
088: return subx(template, extractPyString(ap, 1), count, true);
089: }
090:
091: private PyObject subx(PyObject template, PyString instring,
092: int count, boolean subn) {
093: String string = instring.toString();
094: PyObject filter = null;
095: boolean filter_is_callable = false;
096: if (template.isCallable()) {
097: filter = template;
098: filter_is_callable = true;
099: } else {
100: boolean literal = false;
101: if (template instanceof PyString) {
102: literal = template.toString().indexOf('\\') < 0;
103: }
104: if (literal) {
105: filter = template;
106: filter_is_callable = false;
107: } else {
108: filter = call("sre", "_subx", new PyObject[] { this ,
109: template });
110: filter_is_callable = filter.isCallable();
111: }
112: }
113:
114: SRE_STATE state = new SRE_STATE(string, 0, Integer.MAX_VALUE,
115: flags);
116:
117: StringBuffer buf = new StringBuffer();
118:
119: int n = 0;
120: int i = 0;
121:
122: while (count == 0 || n < count) {
123: state.state_reset();
124: state.ptr = state.start;
125: int status = state.SRE_SEARCH(code, 0);
126: if (status <= 0) {
127: if (status == 0)
128: break;
129: _error(status);
130: }
131: int b = state.start;
132: int e = state.ptr;
133:
134: if (i < b) {
135: /* get segment before this match */
136: buf.append(string.substring(i, b));
137: }
138: if (!(i == b && i == e && n > 0)) {
139: PyObject item;
140: if (filter_is_callable) {
141: /* pass match object through filter */
142: MatchObject match = _pattern_new_match(state,
143: instring, 1);
144: item = filter.__call__(match);
145: } else {
146: item = filter;
147: }
148:
149: if (item != Py.None) {
150: buf.append(item.toString());
151: }
152: i = e;
153: n++;
154: }
155:
156: /* move on */
157: if (state.ptr == state.start)
158: state.start = state.ptr + 1;
159: else
160: state.start = state.ptr;
161: }
162: if (i < state.endpos) {
163: buf.append(string.substring(i, state.endpos));
164: }
165:
166: if (subn)
167: return new PyTuple(new PyObject[] {
168: instring.createInstance(buf.toString()),
169: Py.newInteger(n) });
170: else
171: return instring.createInstance(buf.toString());
172: }
173:
174: public PyObject split(PyObject[] args, String[] kws) {
175: ArgParser ap = new ArgParser("split", args, kws, "source",
176: "maxsplit");
177: PyString string = extractPyString(ap, 0);
178: int maxsplit = ap.getInt(1, 0);
179:
180: SRE_STATE state = new SRE_STATE(string.toString(), 0,
181: Integer.MAX_VALUE, flags);
182:
183: PyList list = new PyList();
184:
185: int n = 0;
186: int last = state.start;
187: while (maxsplit == 0 || n < maxsplit) {
188: state.state_reset();
189: state.ptr = state.start;
190: int status = state.SRE_SEARCH(code, 0);
191: if (status <= 0) {
192: if (status == 0)
193: break;
194: _error(status);
195: }
196: if (state.start == state.ptr) {
197: if (last == state.end)
198: break;
199: /* skip one character */
200: state.start = state.ptr + 1;
201: continue;
202: }
203:
204: /* get segment before this match */
205: PyObject item = string.__getslice__(Py.newInteger(last), Py
206: .newInteger(state.start));
207: list.append(item);
208:
209: for (int i = 0; i < groups; i++) {
210: String s = state.getslice(i + 1, string.toString(),
211: false);
212: if (s != null)
213: list.append(string.createInstance(s));
214: else
215: list.append(Py.None);
216: }
217: n += 1;
218: last = state.start = state.ptr;
219: }
220:
221: list.append(string.__getslice__(Py.newInteger(last), Py
222: .newInteger(state.endpos)));
223:
224: return list;
225: }
226:
227: private PyObject call(String module, String function,
228: PyObject[] args) {
229: PyObject sre = imp.importName(module, true);
230: return sre.invoke(function, args);
231: }
232:
233: public PyObject findall(PyObject[] args, String[] kws) {
234: ArgParser ap = new ArgParser("findall", args, kws, "source",
235: "pos", "endpos");
236: PyString string = extractPyString(ap, 0);
237: int start = ap.getInt(1, 0);
238: int end = ap.getInt(2, Integer.MAX_VALUE);
239:
240: SRE_STATE state = new SRE_STATE(string.toString(), start, end,
241: flags);
242:
243: Vector list = new Vector();
244:
245: while (state.start <= state.end) {
246: state.state_reset();
247: state.ptr = state.start;
248: int status = state.SRE_SEARCH(code, 0);
249: if (status > 0) {
250: PyObject item;
251:
252: /* don't bother to build a match object */
253: switch (groups) {
254: case 0:
255: item = string.__getslice__(Py
256: .newInteger(state.start), Py
257: .newInteger(state.ptr));
258: break;
259: case 1:
260: item = string.createInstance(state.getslice(1,
261: string.toString(), true));
262: break;
263: default:
264: PyObject[] t = new PyObject[groups];
265: for (int i = 0; i < groups; i++)
266: t[i] = string.createInstance(state.getslice(
267: i + 1, string.toString(), true));
268: item = new PyTuple(t);
269: break;
270: }
271:
272: list.addElement(item);
273:
274: if (state.ptr == state.start)
275: state.start = state.ptr + 1;
276: else
277: state.start = state.ptr;
278: } else {
279:
280: if (status == 0)
281: break;
282:
283: _error(status);
284: }
285: }
286: return new PyList(list);
287: }
288:
289: public PyObject finditer(PyObject[] args, String[] kws) {
290: ScannerObject scanner = scanner(args, kws);
291: PyObject search = scanner.__findattr__("search");
292: return new PyCallIter(search, Py.None);
293: }
294:
295: public ScannerObject scanner(PyObject[] args, String[] kws) {
296: ArgParser ap = new ArgParser("scanner", args, kws, "pattern",
297: "pos", "endpos");
298: PyString string = extractPyString(ap, 0);
299:
300: ScannerObject self = new ScannerObject();
301: self.state = new SRE_STATE(string.toString(), ap.getInt(1, 0),
302: ap.getInt(2, Integer.MAX_VALUE), flags);
303: self.pattern = this ;
304: self.string = string;
305: return self;
306: }
307:
308: private void _error(int status) {
309: if (status == SRE_STATE.SRE_ERROR_RECURSION_LIMIT)
310: throw Py.RuntimeError("maximum recursion limit exceeded");
311:
312: throw Py
313: .RuntimeError("internal error in regular expression engine");
314: }
315:
316: MatchObject _pattern_new_match(SRE_STATE state, PyString string,
317: int status) {
318: /* create match object (from state object) */
319:
320: //System.out.println("status = " + status + " " + string);
321: if (status > 0) {
322: /* create match object (with room for extra group marks) */
323: MatchObject match = new MatchObject();
324: match.pattern = this ;
325: match.string = string;
326: match.regs = null;
327: match.groups = groups + 1;
328: /* group zero */
329: int base = state.beginning;
330:
331: match.mark = new int[match.groups * 2];
332: match.mark[0] = state.start - base;
333: match.mark[1] = state.ptr - base;
334:
335: /* fill in the rest of the groups */
336: int i, j;
337: for (i = j = 0; i < groups; i++, j += 2) {
338: if (j + 1 <= state.lastmark && state.mark[j] != -1
339: && state.mark[j + 1] != -1) {
340: match.mark[j + 2] = state.mark[j] - base;
341: match.mark[j + 3] = state.mark[j + 1] - base;
342: } else
343: match.mark[j + 2] = match.mark[j + 3] = -1;
344: }
345: match.pos = state.pos;
346: match.endpos = state.endpos;
347: match.lastindex = state.lastindex;
348:
349: return match;
350: } else if (status == 0) {
351: return null;
352: }
353:
354: _error(status);
355: return null;
356: }
357:
358: private static PyString extractPyString(ArgParser ap, int pos) {
359: PyObject obj = ap.getPyObject(pos);
360: if (!(obj instanceof PyString)) {
361: throw Py.TypeError("expected str or unicode but got "
362: + obj.getType());
363: }
364: return (PyString) ap.getPyObject(pos);
365: }
366: }
|