001: package net.sf.snowball;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.lang.reflect.InvocationTargetException;
021:
022: public class SnowballProgram {
023: protected SnowballProgram() {
024: current = new StringBuffer();
025: setCurrent("");
026: }
027:
028: /**
029: * Set the current string.
030: */
031: public void setCurrent(String value) {
032: current.replace(0, current.length(), value);
033: cursor = 0;
034: limit = current.length();
035: limit_backward = 0;
036: bra = cursor;
037: ket = limit;
038: }
039:
040: /**
041: * Get the current string.
042: */
043: public String getCurrent() {
044: return current.toString();
045: }
046:
047: // current string
048: protected StringBuffer current;
049:
050: protected int cursor;
051: protected int limit;
052: protected int limit_backward;
053: protected int bra;
054: protected int ket;
055:
056: protected void copy_from(SnowballProgram other) {
057: current = other.current;
058: cursor = other.cursor;
059: limit = other.limit;
060: limit_backward = other.limit_backward;
061: bra = other.bra;
062: ket = other.ket;
063: }
064:
065: protected boolean in_grouping(char[] s, int min, int max) {
066: if (cursor >= limit)
067: return false;
068: char ch = current.charAt(cursor);
069: if (ch > max || ch < min)
070: return false;
071: ch -= min;
072: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
073: return false;
074: cursor++;
075: return true;
076: }
077:
078: protected boolean in_grouping_b(char[] s, int min, int max) {
079: if (cursor <= limit_backward)
080: return false;
081: char ch = current.charAt(cursor - 1);
082: if (ch > max || ch < min)
083: return false;
084: ch -= min;
085: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
086: return false;
087: cursor--;
088: return true;
089: }
090:
091: protected boolean out_grouping(char[] s, int min, int max) {
092: if (cursor >= limit)
093: return false;
094: char ch = current.charAt(cursor);
095: if (ch > max || ch < min) {
096: cursor++;
097: return true;
098: }
099: ch -= min;
100: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
101: cursor++;
102: return true;
103: }
104: return false;
105: }
106:
107: protected boolean out_grouping_b(char[] s, int min, int max) {
108: if (cursor <= limit_backward)
109: return false;
110: char ch = current.charAt(cursor - 1);
111: if (ch > max || ch < min) {
112: cursor--;
113: return true;
114: }
115: ch -= min;
116: if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
117: cursor--;
118: return true;
119: }
120: return false;
121: }
122:
123: protected boolean in_range(int min, int max) {
124: if (cursor >= limit)
125: return false;
126: char ch = current.charAt(cursor);
127: if (ch > max || ch < min)
128: return false;
129: cursor++;
130: return true;
131: }
132:
133: protected boolean in_range_b(int min, int max) {
134: if (cursor <= limit_backward)
135: return false;
136: char ch = current.charAt(cursor - 1);
137: if (ch > max || ch < min)
138: return false;
139: cursor--;
140: return true;
141: }
142:
143: protected boolean out_range(int min, int max) {
144: if (cursor >= limit)
145: return false;
146: char ch = current.charAt(cursor);
147: if (!(ch > max || ch < min))
148: return false;
149: cursor++;
150: return true;
151: }
152:
153: protected boolean out_range_b(int min, int max) {
154: if (cursor <= limit_backward)
155: return false;
156: char ch = current.charAt(cursor - 1);
157: if (!(ch > max || ch < min))
158: return false;
159: cursor--;
160: return true;
161: }
162:
163: protected boolean eq_s(int s_size, String s) {
164: if (limit - cursor < s_size)
165: return false;
166: int i;
167: for (i = 0; i != s_size; i++) {
168: if (current.charAt(cursor + i) != s.charAt(i))
169: return false;
170: }
171: cursor += s_size;
172: return true;
173: }
174:
175: protected boolean eq_s_b(int s_size, String s) {
176: if (cursor - limit_backward < s_size)
177: return false;
178: int i;
179: for (i = 0; i != s_size; i++) {
180: if (current.charAt(cursor - s_size + i) != s.charAt(i))
181: return false;
182: }
183: cursor -= s_size;
184: return true;
185: }
186:
187: protected boolean eq_v(StringBuffer s) {
188: return eq_s(s.length(), s.toString());
189: }
190:
191: protected boolean eq_v_b(StringBuffer s) {
192: return eq_s_b(s.length(), s.toString());
193: }
194:
195: protected int find_among(Among v[], int v_size) {
196: int i = 0;
197: int j = v_size;
198:
199: int c = cursor;
200: int l = limit;
201:
202: int common_i = 0;
203: int common_j = 0;
204:
205: boolean first_key_inspected = false;
206:
207: while (true) {
208: int k = i + ((j - i) >> 1);
209: int diff = 0;
210: int common = common_i < common_j ? common_i : common_j; // smaller
211: Among w = v[k];
212: int i2;
213: for (i2 = common; i2 < w.s_size; i2++) {
214: if (c + common == l) {
215: diff = -1;
216: break;
217: }
218: diff = current.charAt(c + common) - w.s.charAt(i2);
219: if (diff != 0)
220: break;
221: common++;
222: }
223: if (diff < 0) {
224: j = k;
225: common_j = common;
226: } else {
227: i = k;
228: common_i = common;
229: }
230: if (j - i <= 1) {
231: if (i > 0)
232: break; // v->s has been inspected
233: if (j == i)
234: break; // only one item in v
235:
236: // - but now we need to go round once more to get
237: // v->s inspected. This looks messy, but is actually
238: // the optimal approach.
239:
240: if (first_key_inspected)
241: break;
242: first_key_inspected = true;
243: }
244: }
245: while (true) {
246: Among w = v[i];
247: if (common_i >= w.s_size) {
248: cursor = c + w.s_size;
249: if (w.method == null)
250: return w.result;
251: boolean res;
252: try {
253: Object resobj = w.method.invoke(w.methodobject,
254: new Object[0]);
255: res = resobj.toString().equals("true");
256: } catch (InvocationTargetException e) {
257: res = false;
258: // FIXME - debug message
259: } catch (IllegalAccessException e) {
260: res = false;
261: // FIXME - debug message
262: }
263: cursor = c + w.s_size;
264: if (res)
265: return w.result;
266: }
267: i = w.substring_i;
268: if (i < 0)
269: return 0;
270: }
271: }
272:
273: // find_among_b is for backwards processing. Same comments apply
274: protected int find_among_b(Among v[], int v_size) {
275: int i = 0;
276: int j = v_size;
277:
278: int c = cursor;
279: int lb = limit_backward;
280:
281: int common_i = 0;
282: int common_j = 0;
283:
284: boolean first_key_inspected = false;
285:
286: while (true) {
287: int k = i + ((j - i) >> 1);
288: int diff = 0;
289: int common = common_i < common_j ? common_i : common_j;
290: Among w = v[k];
291: int i2;
292: for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
293: if (c - common == lb) {
294: diff = -1;
295: break;
296: }
297: diff = current.charAt(c - 1 - common) - w.s.charAt(i2);
298: if (diff != 0)
299: break;
300: common++;
301: }
302: if (diff < 0) {
303: j = k;
304: common_j = common;
305: } else {
306: i = k;
307: common_i = common;
308: }
309: if (j - i <= 1) {
310: if (i > 0)
311: break;
312: if (j == i)
313: break;
314: if (first_key_inspected)
315: break;
316: first_key_inspected = true;
317: }
318: }
319: while (true) {
320: Among w = v[i];
321: if (common_i >= w.s_size) {
322: cursor = c - w.s_size;
323: if (w.method == null)
324: return w.result;
325:
326: boolean res;
327: try {
328: Object resobj = w.method.invoke(w.methodobject,
329: new Object[0]);
330: res = resobj.toString().equals("true");
331: } catch (InvocationTargetException e) {
332: res = false;
333: // FIXME - debug message
334: } catch (IllegalAccessException e) {
335: res = false;
336: // FIXME - debug message
337: }
338: cursor = c - w.s_size;
339: if (res)
340: return w.result;
341: }
342: i = w.substring_i;
343: if (i < 0)
344: return 0;
345: }
346: }
347:
348: /* to replace chars between c_bra and c_ket in current by the
349: * chars in s.
350: */
351: protected int replace_s(int c_bra, int c_ket, String s) {
352: int adjustment = s.length() - (c_ket - c_bra);
353: current.replace(bra, ket, s);
354: limit += adjustment;
355: if (cursor >= c_ket)
356: cursor += adjustment;
357: else if (cursor > c_bra)
358: cursor = c_bra;
359: return adjustment;
360: }
361:
362: protected void slice_check() {
363: if (bra < 0 || bra > ket || ket > limit
364: || limit > current.length()) // this line could be removed
365: {
366: System.err.println("faulty slice operation");
367: // FIXME: report error somehow.
368: /*
369: fprintf(stderr, "faulty slice operation:\n");
370: debug(z, -1, 0);
371: exit(1);
372: */
373: }
374: }
375:
376: protected void slice_from(String s) {
377: slice_check();
378: replace_s(bra, ket, s);
379: }
380:
381: protected void slice_from(StringBuffer s) {
382: slice_from(s.toString());
383: }
384:
385: protected void slice_del() {
386: slice_from("");
387: }
388:
389: protected void insert(int c_bra, int c_ket, String s) {
390: int adjustment = replace_s(c_bra, c_ket, s);
391: if (c_bra <= bra)
392: bra += adjustment;
393: if (c_bra <= ket)
394: ket += adjustment;
395: }
396:
397: protected void insert(int c_bra, int c_ket, StringBuffer s) {
398: insert(c_bra, c_ket, s.toString());
399: }
400:
401: /* Copy the slice into the supplied StringBuffer */
402: protected StringBuffer slice_to(StringBuffer s) {
403: slice_check();
404: int len = ket - bra;
405: s.replace(0, s.length(), current.substring(bra, ket));
406: return s;
407: }
408:
409: protected StringBuffer assign_to(StringBuffer s) {
410: s.replace(0, s.length(), current.substring(0, limit));
411: return s;
412: }
413:
414: /*
415: extern void debug(struct SN_env * z, int number, int line_count)
416: { int i;
417: int limit = SIZE(z->p);
418: //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
419: if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
420: for (i = 0; i <= limit; i++)
421: { if (z->lb == i) printf("{");
422: if (z->bra == i) printf("[");
423: if (z->c == i) printf("|");
424: if (z->ket == i) printf("]");
425: if (z->l == i) printf("}");
426: if (i < limit)
427: { int ch = z->p[i];
428: if (ch == 0) ch = '#';
429: printf("%c", ch);
430: }
431: }
432: printf("'\n");
433: }
434: */
435:
436: };
|