001: package it.unimi.dsi.mg4j.index.snowball;
002:
003: import java.lang.reflect.InvocationTargetException;
004: import it.unimi.dsi.lang.MutableString;
005: import it.unimi.dsi.mg4j.index.TermProcessor;
006:
007: public abstract class AbstractSnowballTermProcessor implements
008: TermProcessor, Cloneable {
009:
010: protected abstract boolean stem();
011:
012: public boolean processTerm(final MutableString term) {
013: current = term.toLowerCase();
014: copy.replace(current);
015: cursor = 0;
016: limit = current.length();
017: array = current.array();
018:
019: limit_backward = 0;
020: bra = cursor;
021: ket = limit;
022: boolean b = stem();
023: // If we stem to length 0, we restore the original term, downcased.
024: if (current.length() == 0)
025: current.replace(copy);
026: current = null;
027: array = null;
028: return b;
029: }
030:
031: public boolean processPrefix(final MutableString prefix) {
032: return prefix != null;
033: }
034:
035: public AbstractSnowballTermProcessor copy() {
036: try {
037: return this .getClass().newInstance();
038: } catch (Exception e) {
039: throw new RuntimeException(e);
040: }
041: }
042:
043: // current string
044: protected MutableString current;
045: // copy string (in case current gets deleted)
046: protected MutableString copy = new MutableString();
047:
048: protected char[] array;
049:
050: protected int cursor;
051:
052: protected int limit;
053:
054: protected int limit_backward;
055:
056: protected int bra;
057:
058: protected int ket;
059:
060: protected void copy_from(AbstractSnowballTermProcessor other) {
061: current = other.current;
062: cursor = other.cursor;
063: limit = other.limit;
064: limit_backward = other.limit_backward;
065: bra = other.bra;
066: ket = other.ket;
067: }
068:
069: protected boolean in_grouping(char[] s, int min, int max) {
070: if (cursor >= limit)
071: return false;
072: char ch = array[cursor];
073: if (ch > max || ch < min)
074: return false;
075: ch -= min;
076: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
077: return false;
078: cursor++;
079: return true;
080: }
081:
082: protected boolean in_grouping_b(char[] s, int min, int max) {
083: if (cursor <= limit_backward)
084: return false;
085: char ch = array[cursor - 1];
086: if (ch > max || ch < min)
087: return false;
088: ch -= min;
089: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
090: return false;
091: cursor--;
092: return true;
093: }
094:
095: protected boolean out_grouping(char[] s, int min, int max) {
096: if (cursor >= limit)
097: return false;
098: char ch = array[cursor];
099: if (ch > max || ch < min) {
100: cursor++;
101: return true;
102: }
103: ch -= min;
104: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
105: cursor++;
106: return true;
107: }
108: return false;
109: }
110:
111: protected boolean out_grouping_b(char[] s, int min, int max) {
112: if (cursor <= limit_backward)
113: return false;
114: char ch = array[cursor - 1];
115: if (ch > max || ch < min) {
116: cursor--;
117: return true;
118: }
119: ch -= min;
120: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
121: cursor--;
122: return true;
123: }
124: return false;
125: }
126:
127: protected boolean in_range(int min, int max) {
128: if (cursor >= limit)
129: return false;
130: char ch = array[cursor];
131: if (ch > max || ch < min)
132: return false;
133: cursor++;
134: return true;
135: }
136:
137: protected boolean in_range_b(int min, int max) {
138: if (cursor <= limit_backward)
139: return false;
140: char ch = array[cursor - 1];
141: if (ch > max || ch < min)
142: return false;
143: cursor--;
144: return true;
145: }
146:
147: protected boolean out_range(int min, int max) {
148: if (cursor >= limit)
149: return false;
150: char ch = array[cursor];
151: if (!(ch > max || ch < min))
152: return false;
153: cursor++;
154: return true;
155: }
156:
157: protected boolean out_range_b(int min, int max) {
158: if (cursor <= limit_backward)
159: return false;
160: char ch = array[cursor - 1];
161: if (!(ch > max || ch < min))
162: return false;
163: cursor--;
164: return true;
165: }
166:
167: protected boolean eq_s(int s_size, String s) {
168: if (limit - cursor < s_size)
169: return false;
170: int i;
171: for (i = 0; i != s_size; i++) {
172: if (array[cursor + i] != s.charAt(i))
173: return false;
174: }
175: cursor += s_size;
176: return true;
177: }
178:
179: protected boolean eq_s_b(int s_size, String s) {
180: if (cursor - limit_backward < s_size)
181: return false;
182: int i;
183: for (i = 0; i != s_size; i++) {
184: if (array[cursor - s_size + i] != s.charAt(i))
185: return false;
186: }
187: cursor -= s_size;
188: return true;
189: }
190:
191: protected boolean eq_v(MutableString s) {
192: return eq_s(s.length(), s.toString());
193: }
194:
195: protected boolean eq_v_b(MutableString s) {
196: return eq_s_b(s.length(), s.toString());
197: }
198:
199: protected int find_among(Among v[], int v_size) {
200: int i = 0;
201: int j = v_size;
202:
203: int c = cursor;
204: int l = limit;
205:
206: int common_i = 0;
207: int common_j = 0;
208:
209: boolean first_key_inspected = false;
210:
211: while (true) {
212: int k = i + ((j - i) >> 1);
213: int diff = 0;
214: int common = common_i < common_j ? common_i : common_j; // smaller
215: Among w = v[k];
216: int i2;
217: for (i2 = common; i2 < w.s_size; i2++) {
218: if (c + common == l) {
219: diff = -1;
220: break;
221: }
222: diff = array[c + common] - w.s[i2];
223: if (diff != 0)
224: break;
225: common++;
226: }
227: if (diff < 0) {
228: j = k;
229: common_j = common;
230: } else {
231: i = k;
232: common_i = common;
233: }
234: if (j - i <= 1) {
235: if (i > 0)
236: break; // v->s has been inspected
237: if (j == i)
238: break; // only one item in v
239:
240: // - but now we need to go round once more to get
241: // v->s inspected. This looks messy, but is actually
242: // the optimal approach.
243:
244: if (first_key_inspected)
245: break;
246: first_key_inspected = true;
247: }
248: }
249: while (true) {
250: Among w = v[i];
251: if (common_i >= w.s_size) {
252: cursor = c + w.s_size;
253: if (w.method == null)
254: return w.result;
255: boolean res;
256: try {
257: Object resobj = w.method.invoke(w.methodobject,
258: new Object[0]);
259: res = resobj.toString().equals("true");
260: } catch (InvocationTargetException e) {
261: res = false;
262: // FIXME - debug message
263: } catch (IllegalAccessException e) {
264: res = false;
265: // FIXME - debug message
266: }
267: cursor = c + w.s_size;
268: if (res)
269: return w.result;
270: }
271: i = w.substring_i;
272: if (i < 0)
273: return 0;
274: }
275: }
276:
277: // find_among_b is for backwards processing. Same comments apply
278: protected int find_among_b(Among v[], int v_size) {
279: int i = 0;
280: int j = v_size;
281:
282: int c = cursor;
283: int lb = limit_backward;
284:
285: int common_i = 0;
286: int common_j = 0;
287:
288: boolean first_key_inspected = false;
289:
290: while (true) {
291: int k = i + ((j - i) >> 1);
292: int diff = 0;
293: int common = common_i < common_j ? common_i : common_j;
294: Among w = v[k];
295: int i2;
296: for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
297: if (c - common == lb) {
298: diff = -1;
299: break;
300: }
301: diff = array[c - 1 - common] - w.s[i2];
302: if (diff != 0)
303: break;
304: common++;
305: }
306: if (diff < 0) {
307: j = k;
308: common_j = common;
309: } else {
310: i = k;
311: common_i = common;
312: }
313: if (j - i <= 1) {
314: if (i > 0)
315: break;
316: if (j == i)
317: break;
318: if (first_key_inspected)
319: break;
320: first_key_inspected = true;
321: }
322: }
323: while (true) {
324: Among w = v[i];
325: if (common_i >= w.s_size) {
326: cursor = c - w.s_size;
327: if (w.method == null)
328: return w.result;
329:
330: boolean res;
331: try {
332: Object resobj = w.method.invoke(w.methodobject,
333: new Object[0]);
334: res = resobj.toString().equals("true");
335: } catch (InvocationTargetException e) {
336: res = false;
337: // FIXME - debug message
338: } catch (IllegalAccessException e) {
339: res = false;
340: // FIXME - debug message
341: }
342: cursor = c - w.s_size;
343: if (res)
344: return w.result;
345: }
346: i = w.substring_i;
347: if (i < 0)
348: return 0;
349: }
350: }
351:
352: /*
353: * to replace chars between c_bra and c_ket in current by the chars in s.
354: */
355: protected int replace_s(int c_bra, int c_ket, String s) {
356: int adjustment = s.length() - (c_ket - c_bra);
357: current.replace(c_bra, c_ket, s);
358: array = current.array();
359: limit += adjustment;
360: if (cursor >= c_ket)
361: cursor += adjustment;
362: else if (cursor > c_bra)
363: cursor = c_bra;
364: return adjustment;
365: }
366:
367: protected void slice_check() {
368: if (bra < 0 || bra > ket || ket > limit
369: || limit > current.length())
370: throw new IllegalArgumentException("Faulty slice operation");
371: }
372:
373: protected void slice_from(String s) {
374: slice_check();
375: replace_s(bra, ket, s);
376: }
377:
378: protected void slice_del() {
379: slice_from("");
380: }
381:
382: protected void insert(int c_bra, int c_ket, String s) {
383: int adjustment = replace_s(c_bra, c_ket, s);
384: if (c_bra <= bra)
385: bra += adjustment;
386: if (c_bra <= ket)
387: ket += adjustment;
388: }
389:
390: protected void insert(int c_bra, int c_ket, MutableString s) {
391: insert(c_bra, c_ket, s.toString());
392: }
393:
394: /* Copy the slice into the supplied MutableString */
395: protected MutableString slice_to(MutableString s) {
396: slice_check();
397: s.replace(0, s.length(), current.substring(bra, ket));
398: return s;
399: }
400:
401: protected MutableString assign_to(MutableString s) {
402: s.replace(0, s.length(), current.substring(0, limit));
403: return s;
404: }
405: };
|