001: /* ====================================================================
002: * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
003: *
004: * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above copyright
011: * notice, this list of conditions and the following disclaimer.
012: *
013: * 2. Redistributions in binary form must reproduce the above copyright
014: * notice, this list of conditions and the following disclaimer in
015: * the documentation and/or other materials provided with the
016: * distribution.
017: *
018: * 3. The end-user documentation included with the redistribution,
019: * if any, must include the following acknowledgment:
020: * "This product includes software developed by Jcorporate Ltd.
021: * (http://www.jcorporate.com/)."
022: * Alternately, this acknowledgment may appear in the software itself,
023: * if and wherever such third-party acknowledgments normally appear.
024: *
025: * 4. "Jcorporate" and product names such as "Expresso" must
026: * not be used to endorse or promote products derived from this
027: * software without prior written permission. For written permission,
028: * please contact info@jcorporate.com.
029: *
030: * 5. Products derived from this software may not be called "Expresso",
031: * or other Jcorporate product names; nor may "Expresso" or other
032: * Jcorporate product names appear in their name, without prior
033: * written permission of Jcorporate Ltd.
034: *
035: * 6. No product derived from this software may compete in the same
036: * market space, i.e. framework, without prior written permission
037: * of Jcorporate Ltd. For written permission, please contact
038: * partners@jcorporate.com.
039: *
040: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
041: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
042: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
043: * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
044: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
045: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
046: * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
047: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
048: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
049: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
050: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
051: * SUCH DAMAGE.
052: * ====================================================================
053: *
054: * This software consists of voluntary contributions made by many
055: * individuals on behalf of the Jcorporate Ltd. Contributions back
056: * to the project(s) are encouraged when you make modifications.
057: * Please send them to support@jcorporate.com. For more information
058: * on Jcorporate Ltd. and its products, please see
059: * <http://www.jcorporate.com/>.
060: *
061: * Portions of this software are based upon other open source
062: * products and are subject to their respective licenses.
063: */
064: package com.jcorporate.expresso.core.dataobjects.jdbc;
065:
066: import com.jcorporate.expresso.core.dataobjects.DataFieldMetaData;
067: import com.jcorporate.expresso.kernel.util.FastStringBuffer;
068: import org.apache.log4j.Logger;
069: import org.apache.oro.text.regex.MalformedPatternException;
070: import org.apache.oro.text.regex.Pattern;
071: import org.apache.oro.text.regex.PatternCompiler;
072: import org.apache.oro.text.regex.PatternMatcher;
073: import org.apache.oro.text.regex.Perl5Compiler;
074: import org.apache.oro.text.regex.Perl5Matcher;
075:
076: import java.util.Collections;
077: import java.util.HashSet;
078: import java.util.Set;
079: import java.util.StringTokenizer;
080:
081: /**
082: * The responsibilities of this class is to verify range values for fields
083: * and throw an exception if there is any 'monkeybusiness' detected in the field.
084: * <p/>
085: * The parser does not allow everything a normal SQL query would allows due to
086: * parsing complexity. However, it will allow multiple conditions, and ranges for
087: * BETWEEN and IN.
088: * </p>
089: * <p>Accepted Range Strings:
090: * <ul>
091: * <li>[NOT] BETWEEN value1 AND value2</li>
092: * <li>[NOT] IN(value1,value2.....)</li>
093: * <li>[>|>=|<|<=] value ([AND|OR] [>|>=|<|<=] value)*</li>
094: * </ul>
095: * </p>
096: * <p>Known bugs with Range issues:
097: * <ul>
098: * <li>There can be no single quotes INSIDE string fields. The fields themselves must
099: * still be quoted.</li>
100: * <li>There cannot be the word AND or OR inside the String range fields.</li>
101: * <li>For IN statements, string field values cannot have commas in them</li>
102: * </ul>
103: * </p>
104: * <p>Instances of FieldRangeParser are meant to use shared between threads</p>
105: *
106: * @author Michael Rimov
107: * @version 2.0
108: */
109: public class FieldRangeParser {
110:
111: /**
112: * The log4j logger
113: */
114: private static final transient Logger log = Logger
115: .getLogger(FieldRangeParser.class);
116:
117: /**
118: * Acceptable modifiers
119: */
120: private static final String MODIFIERS[] = { "AND", "OR", "and",
121: "or" };
122:
123: /**
124: * Acceptable Ranges
125: */
126: private static final String RANGES[] = { "<", "<=", ">=", ">", "<>" };
127:
128: /**
129: * A set of modifiers
130: */
131: private static final Set MODIFIER_SET = Collections
132: .synchronizedSet(new HashSet(MODIFIERS.length));
133:
134: /**
135: * A set of acceptable ranges
136: */
137: private static final Set RANGE_SET = Collections
138: .synchronizedSet(new HashSet(RANGES.length));
139:
140: /**
141: * Dates/Integers, etc should not have wild cards, or single quotes or
142: * escape sequences like \'s. This guards against that.
143: */
144: private static final String LEGAL_NUMBER_RE = "^[^\\\\'%\"]+";
145:
146: /**
147: * String Fields should start and end in ' and ultimately escape all single
148: * quotes in the middle with double single quotes. Unfortunately, there
149: * seems to be a bug in ORO for matchin that second case, so right now
150: * all single quotes in-between are prohibited.
151: */
152: private static final String LEGAL_STRING_RE = "^'[^']*'$";
153:
154: /**
155: * Regular Expression for the basic format of IN statements. Verifies
156: * that it begins with IN, then has open and close parenthesis and closes
157: * with a parenthesis. The Regular expression does not in itself check the
158: * format of the fields inside the parenthesis.
159: */
160: private static final String IN_FORMAT_RE = "^IN\\s*\\(.+\\)$";
161:
162: /**
163: * The compiled pattern for ILLEGAL_CHAR_RE
164: */
165: private Pattern legalCharacters = null;
166:
167: /**
168: * A compiled patter for ILLEGAL_STRING_RE
169: */
170: private Pattern legalStrings = null;
171:
172: /**
173: * Compiled pattern for IN_FORMAT
174: */
175: private Pattern inStatementPattern = null;
176:
177: /**
178: * A Pattern Matcher for examining fields vs validation regular expressions
179: * It has been modified for thead local instantiation to reduce synchronization.
180: */
181: private transient static ThreadLocal patternMatcher = new ThreadLocal() {
182: protected synchronized Object initialValue() {
183: return new Perl5Matcher();
184: }
185: };
186:
187: /**
188: * The singleton instance of this.
189: */
190: private static FieldRangeParser myInstance = new FieldRangeParser();
191:
192: /**
193: * Retrieves an instance of a perl5 pattern matcher that is thread local
194: * to be thread safe.
195: *
196: * @return PatternMatcher instance.
197: */
198: protected PatternMatcher getMatcher() {
199: return (PatternMatcher) patternMatcher.get();
200: }
201:
202: /**
203: * Default constructor. This creates all the regular expression matchers,
204: * etc.
205: */
206: public FieldRangeParser() {
207: try {
208: PatternCompiler compiler = new Perl5Compiler();
209: legalCharacters = compiler.compile(LEGAL_NUMBER_RE,
210: Perl5Compiler.READ_ONLY_MASK);
211: legalStrings = compiler.compile(LEGAL_STRING_RE,
212: Perl5Compiler.READ_ONLY_MASK);
213: inStatementPattern = compiler.compile(IN_FORMAT_RE,
214: Perl5Compiler.READ_ONLY_MASK);
215:
216: for (int i = 0; i < MODIFIERS.length; i++) {
217: MODIFIER_SET.add(MODIFIERS[i]);
218: }
219:
220: for (int i = 0; i < RANGES.length; i++) {
221: RANGE_SET.add(RANGES[i]);
222: }
223:
224: } catch (MalformedPatternException ex) {
225: log
226: .error("Error setting up Illegal Character Regular Expression");
227: }
228: }
229:
230: /**
231: * Retrieve an instance of the field range parser.
232: *
233: * @return FieldRangeParser instance
234: */
235: public static synchronized FieldRangeParser getInstance() {
236: return FieldRangeParser.myInstance;
237:
238: }
239:
240: /**
241: * Performs some basic checks to validate the syntax of the range field value
242: * given the system to help protect against nasty things like sql injection
243: *
244: * @param metadata the field metadata to be checking against.
245: * @param fieldValue the field value to check
246: * @return false if the range is invalid.
247: */
248: public boolean isValidRange(DataFieldMetaData metadata,
249: String fieldValue) {
250:
251: //Obviously null values are not valid ranges.
252: if (fieldValue == null) {
253: return false;
254: }
255:
256: //Take out any potentially leading whitespace
257: String examineString = fieldValue.trim();
258:
259: switch (examineString.charAt(0)) {
260: case 'N':
261: case 'n':
262: if (examineString.startsWith("NOT")
263: || examineString.startsWith("not")) {
264: examineString = examineString.substring(3).trim();
265: } else {
266: //If we have an n to begin with, we need it to be NOT or we don't
267: //consider this valid.
268: return false;
269: }
270:
271: //Fall through since NOT is a special case of IN or BETWEEN
272: case 'I':
273: case 'i':
274: case 'B':
275: case 'b':
276: if (examineString.equalsIgnoreCase("is null")
277: || examineString.equalsIgnoreCase("is not null")) {
278: return true;
279: }
280: if (examineString.startsWith("BETWEEN")
281: || examineString.startsWith("between")) {
282: return validateBetweenStatement(metadata, examineString);
283: } else if (examineString.startsWith("IN")
284: || examineString.startsWith("in")) {
285: return validateInStatement(metadata, examineString);
286: } else {
287: return false;
288: }
289:
290: case '>':
291: case '<':
292: return validateComparisonStatement(metadata, examineString);
293:
294: default:
295: return false;
296:
297: }
298: }
299:
300: /**
301: * Checks a single value and see if it is kosher for the expected data type.
302: *
303: * @param metadata the metadata for the particular field
304: * @param fieldValue the value of the field.
305: * @return true if the field value is valid.
306: */
307: private boolean validateFieldValue(DataFieldMetaData metadata,
308: String fieldValue) {
309: if (metadata.isDateType() || metadata.isDateTimeType()
310: || metadata.isTimeType()) {
311: return validateFieldDateValue(fieldValue);
312: } else if (metadata.isNumericType()) {
313: return validateFieldNumberValue(fieldValue);
314: } else {
315: return validateFieldStringValue(fieldValue);
316: }
317: }
318:
319: /**
320: * Make sure there are no in-between single quotes.
321: *
322: * @param fieldValue the value of the field to check
323: * @return true if the field value is valid.
324: */
325: private boolean validateFieldStringValue(String fieldValue) {
326: return getMatcher().matches(fieldValue, legalStrings);
327: }
328:
329: /**
330: * Instead of parsing the date, just make sure it doesn't have likely injection
331: * values.
332: *
333: * @param fieldValue the value of the field to check
334: * @return true if the field value is valid
335: */
336: private boolean validateFieldNumberValue(String fieldValue) {
337: return getMatcher().matches(fieldValue, legalCharacters);
338: }
339:
340: /**
341: * Instead of parsing the date, just make sure it doesn't have likely injection
342: * values.
343: *
344: * @param fieldValue the value of the field to check
345: * @return true if the field value is valid
346: */
347: private boolean validateFieldDateValue(String fieldValue) {
348: return getMatcher().matches(fieldValue, legalStrings);
349: }
350:
351: /**
352: * Validates BETWEEN type statements which should have: BETWEEN [field1] AND [field2]
353: *
354: * @param metadata field metadata. This specifies the types that should be
355: * validated in the field.
356: * @param expressionValue The range value for the field.
357: * @return true if the expression is valid.
358: */
359: private boolean validateBetweenStatement(
360: DataFieldMetaData metadata, String expressionValue) {
361: StringTokenizer stok = new StringTokenizer(expressionValue);
362: if (!stok.hasMoreTokens()) {
363: return false;
364: }
365:
366: //
367: //Must start with "Between"
368: //
369: String betweenStatement = stok.nextToken();
370: if (!("BETWEEN".equalsIgnoreCase(betweenStatement))
371: || !stok.hasMoreTokens()) {
372: return false;
373: }
374:
375: boolean foundAnd = false;
376:
377: //Now look for AND, and validate field 1
378: FastStringBuffer fsb = FastStringBuffer.getInstance();
379: String fieldValue;
380: try {
381: while (!foundAnd && stok.hasMoreTokens()) {
382: String token = stok.nextToken();
383: if ("AND".equalsIgnoreCase(token)) {
384: foundAnd = true;
385: } else {
386: fsb.append(" ");
387: fsb.append(token);
388: }
389: }
390: fieldValue = fsb.toString().trim();
391: } finally {
392: fsb.release();
393: }
394:
395: if (!foundAnd) {
396: return false;
397: } else {
398: if (!validateFieldValue(metadata, fieldValue)) {
399: return false;
400: }
401: }
402:
403: //OK, now we look for the end.
404: if (!stok.hasMoreTokens()) {
405: //Nothing for a field value!
406: return false;
407: }
408:
409: fsb = FastStringBuffer.getInstance();
410: try {
411: while (stok.hasMoreTokens()) {
412: String token = stok.nextToken();
413: fsb.append(" ");
414: fsb.append(token);
415: }
416:
417: fieldValue = fsb.toString().trim();
418: } finally {
419: fsb.release();
420: }
421:
422: //Ok, we found the last field... validate it and we're done.
423: return (validateFieldValue(metadata, fieldValue));
424: }
425:
426: /**
427: * Validates IN statements which are of the format: IN (value1,value2....)
428: *
429: * @param metadata the data field's metadata
430: * @param expressionValue the range string to parse and validate.
431: * @return true if the IN statement seems to be of valid syntax.
432: */
433: private boolean validateInStatement(DataFieldMetaData metadata,
434: String expressionValue) {
435:
436: boolean match;
437:
438: //It seems that ORO has a race condition in it :(
439: match = getMatcher().matches(expressionValue,
440: inStatementPattern);
441:
442: if (!match) {
443: //Basic syntax is not IN ( .... )
444: return false;
445: }
446:
447: int openParen = expressionValue.indexOf("(");
448: int closeParen = expressionValue.lastIndexOf(")");
449:
450: /**
451: * @todo check this substring
452: */
453: String toEvaluate = expressionValue.substring(openParen + 1,
454: closeParen);
455:
456: StringTokenizer stok = new StringTokenizer(toEvaluate, ",");
457: while (stok.hasMoreTokens()) {
458: String oneFieldValue = stok.nextToken();
459: boolean result = validateFieldValue(metadata, oneFieldValue);
460: if (result == false) {
461: return false;
462: }
463: }
464:
465: return true;
466: }
467:
468: /**
469: * This function evaluates the formats of greater than / less than types of
470: * ranges. It supports multiple comparisons as well.
471: *
472: * @param metadata The metadata for the field.
473: * @param expressionValue value for the field to validate
474: * @return true if the field appears to be of proper formatting.
475: */
476: private boolean validateComparisonStatement(
477: DataFieldMetaData metadata, String expressionValue) {
478: StringTokenizer stok = new StringTokenizer(expressionValue);
479:
480: //WE got a weird expression.
481: if (!stok.hasMoreTokens()) {
482: return false;
483: }
484:
485: return validateComparisonRecusive(metadata, stok);
486: }
487:
488: /**
489: * Validates operator and operand and returns their values.
490: *
491: * @param expression the expression to consume
492: * @return String[0] == operator; String[1] = operand (may be legally null);
493: */
494: private String[] consumeOperator(String expression) {
495: String returnValue[] = new String[2];
496:
497: int index = 0;
498: boolean done = false;
499: int length = expression.length();
500:
501: StringBuffer buffer = new StringBuffer();
502:
503: while (!done && index < length) {
504: char opChar = expression.charAt(index);
505: switch (opChar) {
506: case '>':
507: case '<':
508: case '=':
509: buffer.append(opChar);
510: index++;
511: break;
512: default:
513: returnValue[1] = expression.substring(index);
514: done = true;
515: }
516:
517: }
518:
519: String operator = buffer.toString();
520: if (RANGE_SET.contains(operator)) {
521: returnValue[0] = operator;
522: }
523:
524: return returnValue;
525: }
526:
527: /**
528: * Performs a recursive parsing of the string value. Each recursion checks for
529: * proper < > type of modifications, a field value proper for the data type,
530: * and looks for AND or OR... if it find those, then it calls itself with the
531: * rest of the string
532: *
533: * @param metadata the metadata for the field.
534: * @param tokenizer the tokenizer that is progressing through the range string.
535: * @return true if the validation is successfull, or false if there is bogus
536: * syntax detected.
537: */
538: private boolean validateComparisonRecusive(
539: DataFieldMetaData metadata, StringTokenizer tokenizer) {
540: String operator = tokenizer.nextToken();
541:
542: FastStringBuffer fsb = FastStringBuffer.getInstance();
543:
544: String[] result = consumeOperator(operator);
545: String fieldValue;
546: boolean moreComparisons = false;
547:
548: //No operator found case
549: if (result[0] == null) {
550: return false;
551: }
552:
553: if (result[1] == null && !tokenizer.hasMoreTokens()) {
554: return false;
555: }
556:
557: //Append the first value.
558: if (result[1] != null) {
559: fsb.append(result[1]);
560: }
561:
562: try {
563: boolean done = false;
564: while (!done && tokenizer.hasMoreTokens()) {
565: String oneValue = tokenizer.nextToken();
566: //If we hit AND or OR then we need to evaluate what we've got so
567: //far
568: if (MODIFIER_SET.contains(oneValue)) {
569: moreComparisons = true;
570: done = true;
571: } else {
572: fsb.append(" ");
573: fsb.append(oneValue);
574: }
575: }
576:
577: fieldValue = fsb.toString();
578:
579: } finally {
580: fsb.release();
581: }
582:
583: if (validateFieldValue(metadata, fieldValue.trim())) {
584: if (moreComparisons) {
585: return validateComparisonRecusive(metadata, tokenizer);
586: } else {
587: return true;
588: }
589: } else {
590: //The field test itself flunked.
591: return false;
592: }
593: }
594:
595: /**
596: * Does a given field value denote a range?
597: *
598: * @param fieldValue The field value to check against.
599: * @return The "range" string if the value starts with a range indicator, null if not
600: */
601: public String denotesRange(String fieldValue) {
602:
603: //Quick dummy tests to save us all the string checking.
604: if (fieldValue == null || fieldValue.length() == 0) {
605: return null;
606: }
607:
608: char firstChar = fieldValue.charAt(0);
609: switch (firstChar) {
610: case 'b':
611: case 'B':
612: if (fieldValue.startsWith("between ")) {
613: return "between ";
614: } else if (fieldValue.startsWith("BETWEEN ")) {
615: return "BETWEEN ";
616: }
617: break;
618:
619: case 'i':
620: case 'I':
621: if (fieldValue.startsWith("in ")) {
622: return "in ";
623: } else if (fieldValue.startsWith("IN ")) {
624: return "IN ";
625: } else if (fieldValue.trim()
626: .equalsIgnoreCase("is not null")
627: || fieldValue.trim().equalsIgnoreCase("is null")) {
628: return "";
629: }
630: break;
631:
632: case 'n':
633: case 'N':
634: if (fieldValue.startsWith("not in ")) {
635: return "not in ";
636: } else if (fieldValue.startsWith("NOT IN")) {
637: return "NOT IN ";
638: } else if (fieldValue.startsWith("not between ")) {
639: return "not between ";
640: } else if (fieldValue.startsWith("NOT BETWEEN ")) {
641: return "NOT BETWEEN ";
642: }
643: break;
644:
645: case '<':
646: if (fieldValue.startsWith("<>")) {
647: return "<> ";
648: } else if (fieldValue.startsWith("<=")) {
649: return "<=";
650: } else if (fieldValue.startsWith("< ")) {
651: return "< ";
652:
653: } else if (fieldValue.startsWith("<")) {
654: return "<";
655: }
656: break;
657:
658: case '>':
659: if (fieldValue.startsWith("> ")) {
660: return "> ";
661: } else if (fieldValue.startsWith(">=")) {
662: return ">=";
663: } else if (fieldValue.startsWith(">")) {
664: return ">";
665: }
666: break;
667:
668: default:
669: return null;
670: }
671:
672: return null;
673: } /* denotesRange(String) */
674:
675: }
|