001: package net.sf.saxon.functions;
002:
003: import net.sf.saxon.*;
004: import net.sf.saxon.event.Builder;
005: import net.sf.saxon.event.Stripper;
006: import net.sf.saxon.event.PipelineConfiguration;
007: import net.sf.saxon.expr.MappingFunction;
008: import net.sf.saxon.expr.MappingIterator;
009: import net.sf.saxon.expr.XPathContext;
010: import net.sf.saxon.om.*;
011: import net.sf.saxon.pattern.NodeKindTest;
012: import net.sf.saxon.trans.DynamicError;
013: import net.sf.saxon.trans.XPathException;
014: import net.sf.saxon.value.ObjectValue;
015: import net.sf.saxon.value.Whitespace;
016: import org.xml.sax.XMLReader;
017:
018: import javax.xml.transform.Source;
019: import javax.xml.transform.TransformerException;
020: import javax.xml.transform.ErrorListener;
021: import javax.xml.transform.stream.StreamSource;
022: import java.io.File;
023: import java.io.FilenameFilter;
024: import java.net.URI;
025: import java.net.URISyntaxException;
026:
027: /**
028: * This class implements the default collection URI Resolver.
029: * <p>
030: * This supports two implementations of collections. If the URI supplied uses the "file:/" scheme, and the
031: * file that is referenced is a directory, then the collection is the set of files in that directory. Query parameters
032: * may be included in the URI:
033: * <ul>
034: * <li>recurse=yes|no controls whether the directory is scanned recursively; </li>
035: * <li>strip=yes|no determines whether whitespace text nodes are stripped from the selected documents; </li>
036: * <li>val=strict|lax|preserve|strip determines whether schema validation is applied;</li>
037: * <li>select=pattern determines which files in the directory are selected.</li>
038: * <li>onerror=fail|warn|ignore determines the action taken if processing of a file fails</li>
039: * <li>parser=qualified.class.name selects the parser (XMLReader) to be used to read the files</li>
040: * </ul>
041: * <p>
042: * Otherwise, the resolver attempts to dereference the URI to obtain a catalog file. This is an XML file
043: * containing a list of documents, in the format: </p>
044: * <code><pre>
045: * <collection>
046: * <doc href="doc1.xml"/>
047: * <doc href="doc2.xml"/>
048: * </collection>
049: * </pre></code>
050: */
051:
052: public class StandardCollectionURIResolver implements
053: CollectionURIResolver, MappingFunction {
054:
055: /**
056: * Resolve a URI.
057: *
058: * @param href The relative URI of the collection. This corresponds to the
059: * argument supplied to the collection() function. If the collection() function
060: * was called with no arguments (to get the "default collection") this argument
061: * will be null.
062: * @param base The base URI that should be used. This is the base URI of the
063: * static context in which the call to collection() was made, typically the URI
064: * of the stylesheet or query module
065: * @return an Iterator over the documents in the collection. The items returned
066: * by this iterator must implement the {@link net.sf.saxon.om.NodeInfo} interface.
067: * <p/>
068: * If the URI is not recognized, the method may either return an empty iterator,
069: * in which case no error is reported, or it may throw an exception, in which case
070: * the query or transformation fails. Returning null has the same effect as returning
071: * an empty iterator.
072: */
073:
074: public SequenceIterator resolve(String href, String base,
075: XPathContext context) throws XPathException {
076:
077: if (href == null) {
078: // default collection. This returns empty, we previously threw an error.
079: return null;
080: }
081:
082: if (base == null) {
083: base = StandardURIResolver.tryToExpand(base);
084: if (base == null) {
085: DynamicError err = new DynamicError(
086: "Cannot resolve relative URI: no base URI available");
087: err.setXPathContext(context);
088: throw err;
089: }
090: }
091:
092: URI resolvedURI;
093: URIQueryParameters params = null;
094: try {
095: URI relative = new URI(href);
096: String query = relative.getQuery();
097: if (query != null) {
098: params = new URIQueryParameters(query, context
099: .getConfiguration());
100: int q = href.indexOf('?');
101: href = href.substring(0, q);
102: }
103: resolvedURI = new URI(base).resolve(href);
104: } catch (URISyntaxException e) {
105: DynamicError err = new DynamicError("Invalid URI "
106: + Err.wrap(href)
107: + " passed to collection() function");
108: err.setXPathContext(context);
109: throw err;
110: }
111:
112: if ("file".equals(resolvedURI.getScheme())) {
113: File file = new File(resolvedURI);
114: if (!file.exists()) {
115: DynamicError err = new DynamicError(
116: "The file or directory " + resolvedURI
117: + " does not exist");
118: err.setXPathContext(context);
119: throw err;
120: }
121: if (file.isDirectory()) {
122: return directoryContents(file, params, context);
123: }
124: }
125: return catalogContents(resolvedURI, params, context);
126:
127: }
128:
129: private SequenceIterator directoryContents(File directory,
130: URIQueryParameters params, XPathContext context) {
131:
132: FilenameFilter filter = null;
133:
134: if (params != null) {
135: FilenameFilter f = params.getFilenameFilter();
136: if (f != null) {
137: filter = f;
138: }
139: }
140:
141: File[] files;
142: if (filter == null) {
143: files = directory.listFiles();
144: } else {
145: files = directory.listFiles(filter);
146: }
147:
148: ObjectValue[] fileValues = new ObjectValue[files.length];
149: for (int f = 0; f < files.length; f++) {
150: fileValues[f] = new ObjectValue(files[f]);
151: }
152:
153: // If the URI requested suppression of errors, or that errors should be treated
154: // as warnings, we set up a special ErrorListener to achieve this
155:
156: int onError = URIQueryParameters.ON_ERROR_FAIL;
157: if (params != null && params.getOnError() != null) {
158: onError = params.getOnError().intValue();
159: }
160: final Controller controller = context.getController();
161: final PipelineConfiguration oldPipe = controller
162: .makePipelineConfiguration();
163: final PipelineConfiguration newPipe = new PipelineConfiguration(
164: oldPipe);
165: final ErrorListener oldErrorListener = controller
166: .getErrorListener();
167: if (onError == URIQueryParameters.ON_ERROR_IGNORE) {
168: newPipe.setErrorListener(new ErrorListener() {
169: public void warning(TransformerException exception) {
170: }
171:
172: public void error(TransformerException exception) {
173: }
174:
175: public void fatalError(TransformerException exception) {
176: }
177: });
178: } else if (onError == URIQueryParameters.ON_ERROR_WARNING) {
179: newPipe.setErrorListener(new ErrorListener() {
180: public void warning(TransformerException exception)
181: throws TransformerException {
182: oldErrorListener.warning(exception);
183: }
184:
185: public void error(TransformerException exception)
186: throws TransformerException {
187: oldErrorListener.warning(exception);
188: DynamicError supp = new DynamicError(
189: "The document will be excluded from the collection");
190: supp.setLocator(exception.getLocator());
191: oldErrorListener.warning(supp);
192: }
193:
194: public void fatalError(TransformerException exception)
195: throws TransformerException {
196: error(exception);
197: }
198: });
199: }
200: FileExpander expander = new FileExpander(params, newPipe);
201: SequenceIterator base = new ArrayIterator(fileValues);
202: // if (oldErrorListener != null) {
203: // ClosingAction action = new ClosingAction() {
204: // public void close(SequenceIterator base) {
205: // controller.setErrorListener(oldErrorListener);
206: // }
207: // };
208: // base = new ClosingIterator(base, action);
209: // }
210: return new MappingIterator(base, expander, context);
211: }
212:
213: private SequenceIterator catalogContents(URI catalogFile,
214: URIQueryParameters params, XPathContext context)
215: throws XPathException {
216:
217: DocumentInfo catalog = (DocumentInfo) Document.makeDoc(
218: catalogFile.toString(), null, context, null);
219: if (catalog == null) {
220: // we failed to read the catalogue
221: DynamicError err = new DynamicError(
222: "Failed to load collection catalogue "
223: + catalogFile);
224: err.setXPathContext(context);
225: throw err;
226: }
227:
228: // Now return an iterator over the documents that it refers to
229:
230: SequenceIterator iter = catalog.iterateAxis(Axis.CHILD,
231: NodeKindTest.ELEMENT);
232: NodeInfo top;
233: while (true) {
234: top = (NodeInfo) iter.next();
235: if (top == null)
236: break;
237: if (!("collection".equals(top.getLocalPart()) && top
238: .getURI().equals(""))) {
239: DynamicError err = new DynamicError(
240: "collection catalogue must contain top-level element <collection>");
241: err.setXPathContext(context);
242: throw err;
243: }
244: break;
245: }
246:
247: SequenceIterator documents = top.iterateAxis(Axis.CHILD,
248: NodeKindTest.ELEMENT);
249:
250: return new MappingIterator(documents, this , context);
251: }
252:
253: /**
254: * Map from doc elements in the catalogue document to nodes
255: * returned in the result
256: * @param item A doc element in the catalogue document
257: * @param context The dynamic evaluation context
258: * @return the document or element referenced by the @href attribute of the doc
259: * element in the catalogue
260: * @throws net.sf.saxon.trans.XPathException if the document cannot be retrieved or parsed, unless
261: * error recovery has been chosen.
262: */
263:
264: public Object map(Item item, XPathContext context)
265: throws XPathException {
266: NodeInfo element = (NodeInfo) item;
267: if (!("doc".equals(element.getLocalPart()) && element.getURI()
268: .equals(""))) {
269: DynamicError err = new DynamicError(
270: "children of <collection> element must be <doc> elements");
271: err.setXPathContext(context);
272: throw err;
273: }
274: String href = Navigator.getAttributeValue(element, "", "href");
275: if (href == null) {
276: DynamicError err = new DynamicError(
277: "\"<doc> element in catalogue has no @href attribute\"");
278: err.setXPathContext(context);
279: throw err;
280: }
281:
282: NodeInfo target = Document.makeDoc(href, element.getBaseURI(),
283: context, null);
284: return target;
285: }
286:
287: /**
288: * Mapping function to process the files in a directory. This maps a sequence of external
289: * objects representing files to a sequence of DocumentInfo nodes representing the parsed
290: * contents of those files.
291: */
292:
293: private static class FileExpander implements MappingFunction {
294:
295: private URIQueryParameters params;
296: boolean recurse = false;
297: int strip = Whitespace.UNSPECIFIED;
298: int validation = Validation.STRIP;
299: XMLReader parser = null;
300: int onError = URIQueryParameters.ON_ERROR_FAIL;
301: FilenameFilter filter = null;
302: PipelineConfiguration pipe;
303:
304: public FileExpander(URIQueryParameters params,
305: PipelineConfiguration pipe) {
306: this .params = params;
307: this .pipe = pipe;
308: if (params != null) {
309: FilenameFilter f = params.getFilenameFilter();
310: if (f != null) {
311: filter = f;
312: }
313: Boolean r = params.getRecurse();
314: if (r != null) {
315: recurse = r.booleanValue();
316: }
317: Integer v = params.getValidationMode();
318: if (v != null) {
319: validation = v.intValue();
320: }
321: strip = params.getStripSpace();
322: Integer e = params.getOnError();
323: if (e != null) {
324: onError = e.intValue();
325: }
326: XMLReader p = params.getXMLReader();
327: if (p != null) {
328: parser = p;
329: }
330: }
331:
332: }
333:
334: /**
335: * Map one item to a sequence.
336: *
337: * @param item The item to be mapped.
338: * If context is supplied, this must be the same as context.currentItem().
339: * @param context The processing context. Some mapping functions use this because they require
340: * context information. Some mapping functions modify the context by maintaining the context item
341: * and position. In other cases, the context may be null.
342: * @return either (a) a SequenceIterator over the sequence of items that the supplied input
343: * item maps to, or (b) an Item if it maps to a single item, or (c) null if it maps to an empty
344: * sequence.
345: */
346:
347: public Object map(Item item, XPathContext context)
348: throws XPathException {
349: File file = (File) ((ObjectValue) item).getObject();
350: if (file.isDirectory()) {
351: if (recurse) {
352: File[] files;
353: if (filter == null) {
354: files = file.listFiles();
355: } else {
356: files = file.listFiles(filter);
357: }
358:
359: ObjectValue[] fileValues = new ObjectValue[files.length];
360: for (int f = 0; f < files.length; f++) {
361: fileValues[f] = new ObjectValue(files[f]);
362: }
363:
364: FileExpander expander = new FileExpander(params,
365: pipe);
366: return new MappingIterator(new ArrayIterator(
367: fileValues), expander, context);
368: } else {
369: return null;
370: }
371: } else {
372: try {
373: Source source = new StreamSource(file.toURI()
374: .toString());
375: if (validation != Validation.STRIP
376: && validation != Validation.PRESERVE) {
377: source = AugmentedSource
378: .makeAugmentedSource(source);
379: ((AugmentedSource) source)
380: .setSchemaValidationMode(validation);
381: }
382: if (parser != null) {
383: source = AugmentedSource
384: .makeAugmentedSource(source);
385: ((AugmentedSource) source).setXMLReader(parser);
386: }
387:
388: Stripper stripper = null;
389: if (params != null) {
390: int stripSpace = params.getStripSpace();
391: switch (strip) {
392: case Whitespace.ALL: {
393: stripper = AllElementStripper.getInstance();
394: stripper.setStripAll();
395: source = AugmentedSource
396: .makeAugmentedSource(source);
397: ((AugmentedSource) source)
398: .addFilter(stripper);
399: break;
400: }
401: case Whitespace.IGNORABLE:
402: case Whitespace.NONE:
403: source = AugmentedSource
404: .makeAugmentedSource(source);
405: ((AugmentedSource) source)
406: .setStripSpace(stripSpace);
407: }
408: }
409: NodeInfo contextNode = Builder.build(source,
410: stripper, pipe);
411: return contextNode.getDocumentRoot();
412: } catch (XPathException err) {
413: if (onError == URIQueryParameters.ON_ERROR_IGNORE) {
414: return null;
415: } else if (onError == URIQueryParameters.ON_ERROR_WARNING) {
416: try {
417: if (!err.hasBeenReported()) {
418: pipe.getErrorListener().warning(err);
419: DynamicError supp = new DynamicError(
420: "The document will be excluded from the collection");
421: supp.setLocator(err.getLocator());
422: pipe.getErrorListener().warning(supp);
423: }
424: } catch (TransformerException err2) {
425: //
426: }
427: return null;
428: } else {
429: throw err;
430: }
431: }
432: }
433: }
434: }
435:
436: }
437:
438: //
439: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
440: // you may not use this file except in compliance with the License. You may obtain a copy of the
441: // License at http://www.mozilla.org/MPL/
442: //
443: // Software distributed under the License is distributed on an "AS IS" basis,
444: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
445: // See the License for the specific language governing rights and limitations under the License.
446: //
447: // The Original Code is: all this file.
448: //
449: // The Initial Developer of the Original Code is Michael H. Kay.
450: //
451: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
452: //
453: // Contributor(s): none.
454: //
|