001: /*
002: * Copyright 2004-2007 Gary Bentley
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License"); you may
005: * not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: * http://www.apache.org/licenses/LICENSE-2.0
008: *
009: * Unless required by applicable law or agreed to in writing, software
010: * distributed under the License is distributed on an "AS IS" BASIS,
011: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012: * See the License for the specific language governing permissions and
013: * limitations under the License.
014: */
015: package org.josql.functions;
016:
017: import java.io.File;
018: import java.io.FileReader;
019: import java.io.BufferedReader;
020:
021: import java.util.List;
022: import java.util.ArrayList;
023:
024: import org.josql.functions.regexp.RegExpFactory;
025: import org.josql.functions.regexp.RegExp;
026:
027: import org.josql.QueryExecutionException;
028:
029: /**
030: * This class holds functions that operate on strings in some way.
031: */
032: public class StringFunctions extends AbstractFunctionHandler {
033:
034: public static final String HANDLER_ID = "_internal_string";
035:
036: /**
037: * Match a regular expression against the object passed in.
038: *
039: * @param o The object to match against, <code>toString</code> is called on the object.
040: * @param re The regular expression to match.
041: * @return <code>true</code> if the expression matches.
042: * @throws QueryExecutionException If the match cannot be performed, or if there is no suitable
043: * regular expression library available to the {@link RegExpFactory}.
044: */
045: public boolean regexp(Object o, String re)
046: throws QueryExecutionException {
047:
048: RegExp regexp = RegExpFactory.getDefaultInstance();
049:
050: if (regexp == null) {
051:
052: throw new QueryExecutionException(
053: "No default regular expression library available for: "
054: + RegExpFactory.getDefaultInstanceName());
055:
056: }
057:
058: if (o == null) {
059:
060: return false;
061:
062: }
063:
064: String v = o.toString();
065:
066: return regexp.match(re, v);
067:
068: }
069:
070: /**
071: * Match a regular expression against the object passed in using the specified regular expression
072: * library, pre-defined library names can be found in: {@link RegExpFactory}.
073: *
074: * @param o The object to match against, <code>toString</code> is called on the object.
075: * @param re The regular expression to match.
076: * @param instName The name of the regular expression library to use.
077: * @return <code>true</code> if the expression matches.
078: * @throws QueryExecutionException If the match cannot be performed, or if the <b>instName</b>
079: * regular expression library is not available to the {@link RegExpFactory}.
080: */
081: public boolean regexp(Object o, String re, String instName)
082: throws QueryExecutionException {
083:
084: RegExp regexp = RegExpFactory.getInstance(instName);
085:
086: if (regexp == null) {
087:
088: throw new QueryExecutionException(
089: "No regular expression library available for: "
090: + instName);
091:
092: }
093:
094: if (o == null) {
095:
096: return false;
097:
098: }
099:
100: String v = o.toString();
101:
102: return regexp.match(re, v);
103:
104: }
105:
106: /**
107: * <a target="_blank" href="http://www.gnu.org/software/grep/grep.html">grep</a>
108: * through a file, line by line, and determine what matches there are to the nominated
109: * String. Return a List of {@link FileMatch} objects.
110: *
111: * @param f The File to match against.
112: * @param s The string to match.
113: * @param ignoreCase If set to <code>true</code> then the case of the line and string to
114: * match are ignored.
115: * @return The List of {@link FileMatch} objects.
116: */
117: public List grep(File f, String s, boolean ignoreCase)
118: throws QueryExecutionException {
119:
120: if ((f == null) || (!f.exists()) || (f.isDirectory())
121: || (!f.canRead())) {
122:
123: return null;
124:
125: }
126:
127: List retData = new ArrayList();
128:
129: try {
130:
131: BufferedReader br = new BufferedReader(new FileReader(f));
132:
133: String l = br.readLine();
134:
135: int lc = 1;
136:
137: String ss = s;
138:
139: if (ignoreCase) {
140:
141: ss = s.toLowerCase();
142:
143: }
144:
145: while (l != null) {
146:
147: int ind = -1;
148:
149: if (ignoreCase) {
150:
151: ind = l.toLowerCase().indexOf(ss);
152:
153: } else {
154:
155: ind = l.indexOf(ss);
156:
157: }
158:
159: if (ind != -1) {
160:
161: retData.add(new FileMatch(f, lc, ind, s, l));
162:
163: }
164:
165: l = br.readLine();
166:
167: lc++;
168:
169: }
170:
171: br.close();
172:
173: } catch (Exception e) {
174:
175: throw new QueryExecutionException(
176: "Unable to read from file: " + f, e);
177:
178: }
179:
180: return retData;
181:
182: }
183:
184: /**
185: * <a target="_blank" href="http://www.gnu.org/software/grep/grep.html">grep</a>
186: * through a file, line by line, and determine what matches there are to the nominated
187: * regular expression using the specified regular expression implementation.
188: * Return a List of {@link FileMatch} objects.
189: *
190: * @param f The File to match against.
191: * @param regexp The regular expression to match against each line. This will use the
192: * default regular expression library. In this case the location of the match
193: * (i.e. {@link FileMatch#getColumn()}) will be -1 since the regular expression
194: * handling does not support location matching. Also, {@link FileMatch#getString()}
195: * will contain the regular expression used.
196: * @param instName The instance name to use.
197: * @return The List of {@link FileMatch} objects.
198: * @throws QueryExecutionException If the default regular expression implementation is not
199: * available or if the file cannot be read.
200: */
201: public List rgrep(File f, String regexp, String instName)
202: throws QueryExecutionException {
203:
204: RegExp reImpl = RegExpFactory.getInstance(instName);
205:
206: if (reImpl == null) {
207:
208: throw new QueryExecutionException(
209: "No default regular expression library available for: "
210: + instName);
211:
212: }
213:
214: return this .rgrep(f, regexp, reImpl);
215:
216: }
217:
218: /**
219: * <a target="_blank" href="http://www.gnu.org/software/grep/grep.html">grep</a>
220: * through a file, line by line, and determine what matches there are to the nominated
221: * regular expression. Return a List of {@link FileMatch} objects.
222: *
223: * @param f The File to match against.
224: * @param regexp The regular expression to match against each line. This will use the
225: * default regular expression library. In this case the location of the match
226: * (i.e. {@link FileMatch#getColumn()}) will be -1 since the regular expression
227: * handling does not support location matching. Also, {@link FileMatch#getString()}
228: * will contain the regular expression used.
229: * @return The List of {@link FileMatch} objects.
230: * @throws QueryExecutionException If the default regular expression implementation is not
231: * available or if the file cannot be read.
232: */
233: public List rgrep(File f, String regexp)
234: throws QueryExecutionException {
235:
236: RegExp reImpl = RegExpFactory.getDefaultInstance();
237:
238: if (reImpl == null) {
239:
240: throw new QueryExecutionException(
241: "No default regular expression library available for: "
242: + RegExpFactory.getDefaultInstanceName());
243:
244: }
245:
246: return this .rgrep(f, regexp, reImpl);
247:
248: }
249:
250: private List rgrep(File f, String regexp, RegExp reImpl)
251: throws QueryExecutionException {
252:
253: if ((f == null) || (!f.exists()) || (f.isDirectory())
254: || (!f.canRead())) {
255:
256: return null;
257:
258: }
259:
260: List retData = new ArrayList();
261:
262: try {
263:
264: BufferedReader br = new BufferedReader(new FileReader(f));
265:
266: String l = br.readLine();
267:
268: int lc = 1;
269:
270: while (l != null) {
271:
272: if (reImpl.match(regexp, l)) {
273:
274: retData.add(new FileMatch(f, lc, -1, regexp, l));
275:
276: }
277:
278: l = br.readLine();
279:
280: lc++;
281:
282: }
283:
284: br.close();
285:
286: } catch (Exception e) {
287:
288: throw new QueryExecutionException(
289: "Unable to read from file: " + f, e);
290:
291: }
292:
293: return retData;
294:
295: }
296:
297: /**
298: * <a target="_blank" href="http://www.gnu.org/software/grep/grep.html">grep</a>
299: * through a file, line by line, and determine what matches there are to the nominated
300: * String. Return a List of {@link FileMatch} objects.
301: *
302: * @param f The File to match against.
303: * @param s The string to match.
304: * @return The List of {@link FileMatch} objects.
305: */
306: public List grep(File f, String s) throws QueryExecutionException {
307:
308: return this .grep(f, s, false);
309:
310: }
311:
312: /**
313: * Given a string trim the passed in string from the front and end, set <b>v</b> to <code>null</code>
314: * to have just whitespace trimmed. Both are converted to strings first.
315: *
316: * @param o The string to trim.
317: * @param v The string to trim from the front and end. Set to <code>null</code> to just trim
318: * whitespace.
319: * @return The trimmed string.
320: */
321: public String trim(Object o, Object v) {
322:
323: if (o == null) {
324:
325: return null;
326:
327: }
328:
329: String os = o.toString();
330:
331: if (v == null) {
332:
333: return os.trim();
334:
335: }
336:
337: String vs = v.toString();
338:
339: if (os.endsWith(vs)) {
340:
341: os = os.substring(0, vs.length());
342:
343: }
344:
345: if (os.startsWith(vs)) {
346:
347: os = os.substring(vs.length());
348:
349: }
350:
351: return os;
352:
353: }
354:
355: /**
356: * A thinly veiled wrapper around the {@link String#lastIndexOf(String)} method.
357: * Both <b>o</b> and <b>i</b> are converted to Strings and then the "lastIndexoOf" method
358: * is called on <b>o</b> with <b>i</b> as the argument.
359: *
360: * @param o The string to search.
361: * @param i The string to match.
362: * @return The last index of <b>i</b> within <b>o</b>. If <b>o</b> is <code>null</code> then
363: * -1 is returned. If <b>i</b> is <code>null</code> then -1 is returned.
364: */
365: public double lastIndexOf(Object o, Object i) {
366:
367: if (o == null) {
368:
369: return -1;
370:
371: }
372:
373: if (i == null) {
374:
375: return -1;
376:
377: }
378:
379: String os = o.toString();
380: String is = i.toString();
381:
382: return os.lastIndexOf(is);
383:
384: }
385:
386: /**
387: * Return a substring of the passed in object (in a string form). See {@link #subStr(Object,double,double)}
388: * for the full details since this is just a thin-wrapper around that method with the <b>t</b>
389: * parameter set to -1.
390: *
391: * @param o The object to convert to a string and return the substring.
392: * @param f The start index. If this is set to 0 then the entire string is returned.
393: * @return The substring.
394: */
395: public String subStr(Object o, double f) {
396:
397: return this .subStr(o, f, -1);
398:
399: }
400:
401: /**
402: * A function to return a substring of a String. If the passed in object isn't
403: * a string then it is converted to a string before processing.
404: *
405: * @param o The object to convert to a string and return the substring.
406: * @param f The start index. If it's < 0 then "" is returned. If the start is out of
407: * range of the string then "" is returned.
408: * @param t The end index. If it's > f then it is reset to -1. If it's -1 then
409: * it's ignored and the substring from the start is used. If the end is greater
410: * than the length of the string then it is ignored.
411: * @return The substring.
412: */
413: public String subStr(Object o, double f, double t) {
414:
415: if (o == null) {
416:
417: return null;
418:
419: }
420:
421: int fi = (int) f;
422: int ti = (int) t;
423:
424: String s = o.toString();
425:
426: if ((fi < 0) || (fi > s.length())) {
427:
428: return "";
429:
430: }
431:
432: if ((ti < fi) || (ti > s.length())) {
433:
434: ti = -1;
435:
436: }
437:
438: if (ti == -1) {
439:
440: return s.substring(fi);
441:
442: }
443:
444: return s.substring(fi, ti);
445:
446: }
447:
448: public double length(Object o) {
449:
450: if (o == null) {
451:
452: return 0;
453:
454: }
455:
456: if (o instanceof String) {
457:
458: return ((String) o).length();
459:
460: }
461:
462: return o.toString().length();
463:
464: }
465:
466: }
|