001: /* $Id: ExtendedBaseRules.java 471661 2006-11-06 08:09:25Z skitching $
002: *
003: * Licensed to the Apache Software Foundation (ASF) under one or more
004: * contributor license agreements. See the NOTICE file distributed with
005: * this work for additional information regarding copyright ownership.
006: * The ASF licenses this file to You under the Apache License, Version 2.0
007: * (the "License"); you may not use this file except in compliance with
008: * the License. You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: package org.apache.commons.digester;
020:
021: import java.util.ArrayList;
022: import java.util.Collections;
023: import java.util.Comparator;
024: import java.util.HashMap;
025: import java.util.Iterator;
026: import java.util.List;
027: import java.util.Map;
028:
029: /**
030: * <p>Extension of {@link RulesBase} for complex schema.</p>
031: *
032: * <p>This is an extension of the basic pattern matching scheme
033: * intended to improve support for mapping complex xml-schema.
034: * It is intended to be a minimal extension of the standard rules
035: * big enough to support complex schema but without the full generality
036: * offered by more exotic matching pattern rules.</p>
037: *
038: * <h4>When should you use this rather than the original?</h4>
039: *
040: * <p>
041: * This pattern-matching engine is complex and slower than the basic
042: * default RulesBase class, but offers more functionality:
043: * <ul>
044: * <li>Universal patterns allow patterns to be specified which will match
045: * regardless of whether there are "better matching" patterns available.</li>
046: * <li>Parent-match patterns (eg "a/b/?") allow matching for all direct
047: * children of a specified element.</li>
048: * <li>Ancestor-match patterns (eg "a/b/*") allow matching all elements
049: * nested within a specified element to any nesting depth.</li>
050: * <li>Completely-wild patterns ("*" or "!*") allow matching all elements.</li>
051: * </ul>
052: * </p>
053: *
054: * <h4>Universal Match Patterns</h4>
055: *
056: * <p>The default RulesBase pattern-matching engine always attempts to find
057: * the "best matching pattern", and will ignore rules associated with other
058: * patterns that match but are not "as good". As an example, if the pattern
059: * "a/b/c" is associated with rules 1 and 2, and "*/c" is associated with
060: * rules 3 and 4 then element "a/b/c" will cause only rules 1 and 2 to execute.
061: * Rules 3 and 4 do have matching patterns, but because the patterns are shorter
062: * and include wildcard characters they are regarded as being "not as good" as
063: * a direct match. In general, exact patterns are better than wildcard patterns,
064: * and among multiple patterns with wildcards, the longest is preferred.
065: * See the RulesBase class for more information.</p>
066: *
067: * <p>This feature of preferring "better" patterns can be a powerful tool.
068: * However it also means that patterns can interact in unexpected ways.</p>
069: *
070: * <p>When using the ExtendedBaseRules, any pattern prefixed with '!' bypasses
071: * the "best match" feature. Even if there is an exact match or a longer
072: * wildcard match, patterns prefixed by '!' will still be tested to see if
073: * they match, and if so their associated Rule objects will be included in
074: * the set of rules to be executed in the normal manner.</p>
075: *
076: * <ul>
077: * <li>Pattern <code>"!*/a/b"</code> matches whenever an 'b' element
078: * is inside an 'a'.</li>
079: * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent
080: * matching <code>"a/b"</code> (see "Parent Match Patterns").</li>
081: * <li>Pattern <code>"!*/a/b/?"</code> matches any child of a parent
082: * matching <code>"!*/a/b"</code> (see "Parent Match Patterns").</li>
083: * <li>Pattern <code>"!a/b/*"</code> matches any element whose path
084: * starts with "a" then "b" (see "Ancestor Match Patterns").</li>
085: * <li>Pattern <code>"!*/a/b/*"</code> matches any elements whose path
086: * contains 'a/b' (see "Ancestor Match Patterns").</li>
087: * </ul>
088: *
089: * <h4>Parent Match Patterns</h4>
090: *
091: * <p>
092: * These will match direct child elements of a particular parent element.
093: * <ul>
094: * <li>
095: * <code>"a/b/c/?"</code> matches any child whose parent matches
096: * <code>"a/b/c"</code>. Exact parent rules take precedence over Ancestor
097: * Match patterns.
098: * </li>
099: * <li>
100: * <code>"*/a/b/c/?"</code> matches any child whose parent matches
101: * <code>"*/a/b/c"</code>. The longest matching still applies to parent
102: * matches but the length excludes the '?', which effectively means
103: * that standard wildcard matches with the same level of depth are
104: * chosen in preference.
105: * </li>
106: * </ul>
107: * </p>
108: *
109: * <h4>Ancestor Match Patterns</h4>
110: *
111: * <p>
112: * These will match elements whose parentage includes a particular sequence
113: * of elements.
114: * <ul>
115: * <li>
116: * <code>"a/b/*"</code> matches any element whose path starts with
117: * 'a' then 'b'. Exact parent and parent match rules take precedence.
118: * The longest ancestor match will take precedence.
119: * </li>
120: * <li>
121: * <code>"*/a/b/*"</code> matches any elements whose path contains
122: * an element 'a' followed by an element 'b'. The longest matching still
123: * applies but the length excludes the '*' at the end.
124: * </li>
125: * </ul>
126: * </p>
127: *
128: * <h4>Completely Wild Patterns</h4>
129: *
130: * <p>Pattern <code>"*"</code> matches every pattern that isn't matched by
131: * any other basic rule.</p>
132: *
133: * <p>Pattern <code>"!*"</code> matches every pattern.</p>
134: *
135: * <h4>Using The Extended Rules</h4>
136: *
137: * <p>By default, a Digester instance uses a {@link RulesBase} instance as
138: * its pattern matching engine. To use an ExtendedBaseRules instance, call
139: * the Digester.setRules method before adding any Rule objects to the digester
140: * instance:
141: * <pre>
142: * Digester digester = new Digester();
143: * digester.setRules( new ExtendedBaseRules() );
144: * </pre></p>
145: *
146: * <p>The most important thing to remember when using the extended rules is
147: * that universal and non-universal patterns are completely independent.
148: * Universal patterns are never affected by the addition of new patterns
149: * or the removal of existing ones. Non-universal patterns are never affected
150: * by the addition of new <em>universal</em> patterns or the removal of
151: * existing <em>universal</em> patterns. As in the basic matching rules,
152: * non-universal (basic) patterns <strong>can</strong> be affected by the
153: * addition of new <em>non-universal</em> patterns or the removal of existing
154: * <em>non-universal</em> patterns, because only rules associated with the
155: * "best matching" pattern for each xml element are executed.
156: *
157: * <p> This means that you can use universal patterns to build up the simple
158: * parts of your structure - for example defining universal creation and
159: * property setting rules. More sophisticated and complex mapping will require
160: * non-universal patterns and this might mean that some of the universal rules
161: * will need to be replaced by a series of special cases using non-universal
162: * rules. But by using universal rules as your backbone, these additions
163: * should not break your existing rules.</p>
164: */
165:
166: public class ExtendedBaseRules extends RulesBase {
167:
168: // ----------------------------------------------------- Instance Variables
169:
170: /**
171: * Counts the entry number for the rules.
172: */
173: private int counter = 0;
174:
175: /**
176: * The decision algorithm used (unfortunately) doesn't preserve the entry
177: * order.
178: * This map is used by a comparator which orders the list of matches
179: * before it's returned.
180: * This map stores the entry number keyed by the rule.
181: */
182: private Map order = new HashMap();
183:
184: // --------------------------------------------------------- Public Methods
185:
186: /**
187: * Register a new Rule instance matching the specified pattern.
188: *
189: * @param pattern Nesting pattern to be matched for this Rule
190: * @param rule Rule instance to be registered
191: */
192: public void add(String pattern, Rule rule) {
193: super .add(pattern, rule);
194: counter++;
195: order.put(rule, new Integer(counter));
196: }
197:
198: /**
199: * Return a List of all registered Rule instances that match the specified
200: * nesting pattern, or a zero-length List if there are no matches. If more
201: * than one Rule instance matches, they <strong>must</strong> be returned
202: * in the order originally registered through the <code>add()</code>
203: * method.
204: *
205: * @param pattern Nesting pattern to be matched
206: */
207: public List match(String namespace, String pattern) {
208: // calculate the pattern of the parent
209: // (if the element has one)
210: String parentPattern = "";
211: int lastIndex = pattern.lastIndexOf('/');
212:
213: boolean hasParent = true;
214: if (lastIndex == -1) {
215: // element has no parent
216: hasParent = false;
217:
218: } else {
219: // calculate the pattern of the parent
220: parentPattern = pattern.substring(0, lastIndex);
221:
222: }
223:
224: // we keep the list of universal matches separate
225: List universalList = new ArrayList(counter);
226:
227: // Universal all wildards ('!*')
228: // These are always matched so always add them
229: List tempList = (List) this .cache.get("!*");
230: if (tempList != null) {
231: universalList.addAll(tempList);
232: }
233:
234: // Universal exact parent match
235: // need to get this now since only wildcards are considered later
236: tempList = (List) this .cache.get("!" + parentPattern + "/?");
237: if (tempList != null) {
238: universalList.addAll(tempList);
239: }
240:
241: // base behaviour means that if we certain matches, we don't continue
242: // but we just have a single combined loop and so we have to set
243: // a variable
244: boolean ignoreBasicMatches = false;
245:
246: // see if we have an exact basic pattern match
247: List rulesList = (List) this .cache.get(pattern);
248: if (rulesList != null) {
249: // we have a match!
250: // so ignore all basic matches from now on
251: ignoreBasicMatches = true;
252:
253: } else {
254:
255: // see if we have an exact child match
256: if (hasParent) {
257: // matching children takes preference
258: rulesList = (List) this .cache.get(parentPattern + "/?");
259: if (rulesList != null) {
260: // we have a match!
261: // so ignore all basic matches from now on
262: ignoreBasicMatches = true;
263:
264: } else {
265: // we don't have a match yet - so try exact ancester
266: //
267: rulesList = findExactAncesterMatch(pattern);
268: if (rulesList != null) {
269: // we have a match!
270: // so ignore all basic matches from now on
271: ignoreBasicMatches = true;
272: }
273: }
274: }
275: }
276:
277: // OK - we're ready for the big loop!
278: // Unlike the basic rules case,
279: // we have to go through for all those universal rules in all cases.
280:
281: // Find the longest key, ie more discriminant
282: String longKey = "";
283: int longKeyLength = 0;
284:
285: Iterator keys = this .cache.keySet().iterator();
286: while (keys.hasNext()) {
287: String key = (String) keys.next();
288:
289: // find out if it's a univeral pattern
290: // set a flag
291: boolean isUniversal = key.startsWith("!");
292: if (isUniversal) {
293: // and find the underlying key
294: key = key.substring(1, key.length());
295: }
296:
297: // don't need to check exact matches
298: boolean wildcardMatchStart = key.startsWith("*/");
299: boolean wildcardMatchEnd = key.endsWith("/*");
300: if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) {
301:
302: boolean parentMatched = false;
303: boolean basicMatched = false;
304: boolean ancesterMatched = false;
305:
306: boolean parentMatchEnd = key.endsWith("/?");
307: if (parentMatchEnd) {
308: // try for a parent match
309: parentMatched = parentMatch(key, pattern,
310: parentPattern);
311:
312: } else if (wildcardMatchEnd) {
313: // check for ancester match
314: if (wildcardMatchStart) {
315: String patternBody = key.substring(2, key
316: .length() - 2);
317: if (pattern.endsWith(patternBody)) {
318: ancesterMatched = true;
319: } else {
320: ancesterMatched = (pattern
321: .indexOf(patternBody + "/") > -1);
322: }
323: } else {
324: String bodyPattern = key.substring(0, key
325: .length() - 2);
326: if (pattern.startsWith(bodyPattern)) {
327: if (pattern.length() == bodyPattern
328: .length()) {
329: // exact match
330: ancesterMatched = true;
331: } else {
332: ancesterMatched = (pattern
333: .charAt(bodyPattern.length()) == '/');
334: }
335: } else {
336: ancesterMatched = false;
337: }
338: }
339: } else {
340: // try for a base match
341: basicMatched = basicMatch(key, pattern);
342: }
343:
344: if (parentMatched || basicMatched || ancesterMatched) {
345: if (isUniversal) {
346: // universal rules go straight in
347: // (no longest matching rule)
348: tempList = (List) this .cache.get("!" + key);
349: if (tempList != null) {
350: universalList.addAll(tempList);
351: }
352:
353: } else {
354: if (!ignoreBasicMatches) {
355: // ensure that all parent matches are SHORTER
356: // than rules with same level of matching.
357: //
358: // the calculations below don't work for universal
359: // matching, but we don't care because in that case
360: // this if-stmt is not entered.
361: int keyLength = key.length();
362: if (wildcardMatchStart) {
363: --keyLength;
364: }
365: if (wildcardMatchEnd) {
366: --keyLength;
367: } else if (parentMatchEnd) {
368: --keyLength;
369: }
370:
371: if (keyLength > longKeyLength) {
372: rulesList = (List) this .cache.get(key);
373: longKey = key;
374: longKeyLength = keyLength;
375: }
376: }
377: }
378: }
379: }
380: }
381:
382: // '*' works in practice as a default matching
383: // (this is because anything is a deeper match!)
384: if (rulesList == null) {
385: rulesList = (List) this .cache.get("*");
386: }
387:
388: // if we've matched a basic pattern, then add to the universal list
389: if (rulesList != null) {
390: universalList.addAll(rulesList);
391: }
392:
393: // don't filter if namespace is null
394: if (namespace != null) {
395: // remove invalid namespaces
396: Iterator it = universalList.iterator();
397: while (it.hasNext()) {
398: Rule rule = (Rule) it.next();
399: String ns_uri = rule.getNamespaceURI();
400: if (ns_uri != null && !ns_uri.equals(namespace)) {
401: it.remove();
402: }
403: }
404: }
405:
406: // need to make sure that the collection is sort in the order
407: // of addition. We use a custom comparator for this
408: Collections.sort(universalList, new Comparator() {
409:
410: public int compare(Object o1, Object o2)
411: throws ClassCastException {
412: // Get the entry order from the map
413: Integer i1 = (Integer) order.get(o1);
414: Integer i2 = (Integer) order.get(o2);
415:
416: // and use that to perform the comparison
417: if (i1 == null) {
418: if (i2 == null) {
419:
420: return 0;
421:
422: } else {
423:
424: return -1;
425:
426: }
427: } else if (i2 == null) {
428: return 1;
429: }
430:
431: return (i1.intValue() - i2.intValue());
432: }
433: });
434:
435: return universalList;
436: }
437:
438: /**
439: * Matching parent.
440: */
441: private boolean parentMatch(String key, String pattern,
442: String parentPattern) {
443: return parentPattern.endsWith(key
444: .substring(1, key.length() - 2));
445: }
446:
447: /**
448: * Standard match.
449: * Matches the end of the pattern to the key.
450: */
451: private boolean basicMatch(String key, String pattern) {
452: return (pattern.equals(key.substring(2)) || pattern
453: .endsWith(key.substring(1)));
454: }
455:
456: /**
457: * Finds an exact ancester match for given pattern
458: */
459: private List findExactAncesterMatch(String parentPattern) {
460: List matchingRules = null;
461: int lastIndex = parentPattern.length();
462: while (lastIndex-- > 0) {
463: lastIndex = parentPattern.lastIndexOf('/', lastIndex);
464: if (lastIndex > 0) {
465: matchingRules = (List) this .cache.get(parentPattern
466: .substring(0, lastIndex)
467: + "/*");
468: if (matchingRules != null) {
469: return matchingRules;
470: }
471: }
472: }
473: return null;
474: }
475: }
|