001: /*
002: * Copyright 2005-2006 The Kuali Foundation.
003: *
004: *
005: * Licensed under the Educational Community License, Version 1.0 (the "License");
006: * you may not use this file except in compliance with the License.
007: * You may obtain a copy of the License at
008: *
009: * http://www.opensource.org/licenses/ecl1.php
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package edu.iu.uis.eden.batch;
018:
019: import java.io.IOException;
020: import java.util.Collection;
021: import java.util.Iterator;
022: import java.util.LinkedHashSet;
023: import java.util.LinkedList;
024: import java.util.List;
025: import java.util.Set;
026:
027: import javax.xml.XMLConstants;
028: import javax.xml.parsers.DocumentBuilder;
029: import javax.xml.parsers.DocumentBuilderFactory;
030: import javax.xml.parsers.ParserConfigurationException;
031:
032: import org.apache.log4j.Logger;
033: import org.springframework.beans.factory.BeanInitializationException;
034: import org.xml.sax.EntityResolver;
035: import org.xml.sax.ErrorHandler;
036: import org.xml.sax.SAXException;
037: import org.xml.sax.SAXParseException;
038:
039: import edu.iu.uis.eden.KEWServiceLocator;
040: import edu.iu.uis.eden.XmlLoader;
041: import edu.iu.uis.eden.user.WorkflowUser;
042: import edu.iu.uis.eden.util.Utilities;
043: import edu.iu.uis.eden.xml.ClassLoaderEntityResolver;
044:
045: /**
046: * XmlIngesterService implementation which delegates to XmlDigesterService.
047: * This implementation goes through some pains to ensure that the types of
048: * xml doc (determined by file name convention) are issued to the XmlDigesterService
049: * in a pre-ordained order in an effort to avoid dependency problems. This implementation
050: * is not responsible for knowing about the mappings between types and services, but
051: * only the ordering of types, for the moment.
052: * NOTE: when types are merged into a universal document, we need to decide how to handle
053: * rollback if any specific type <i>in</i> that document fails, given that the current,
054: * legacy implementation assumes that a given XmlDoc consists of one and only one type
055: * and as such can be rolled back atomically. For instance, if universal doc now contains
056: * types A, B, and C, and it invokes ServiceA, ServiceB, and ServiceC in succession on the
057: * entire document, and ServiceB throws an exception attempting to parse B content...
058: * is it sufficient to rollback only that entry, or do we rollback the whole document
059: * and consider it "tainted"? (not to mention whether we should roll back the entire collection
060: * of which the document is a part - for now we do NOT rollback a collection or workflow data doc,
061: * but it is merely moved to a "problem" directory by the poller. the implementation does not yet
062: * specifically note which document or type (and potentially eventually which entry) failed in the
063: * collection or workflow data doc)
064: *
065: * NOTE: this service must be invoked only after all other services have initialized
066: * this <i>should</i> be the case since the LifeCycle is kicked off after contextInitialized,
067: * which <i>should</i> occur after Spring is actually done initializing. But is it, considering
068: * we are asynchronously initializing Spring? There is a 30 second built-in delay before
069: * XmlPoller is first run, but suffice it to say there is a possible race condition.
070: *
071: * @see edu.iu.uis.eden.batch.XmlIngesterService
072: * @see edu.iu.uis.eden.batch.XmlDigesterServiceImpl
073: * @author Aaron Hamid (arh14 at cornell dot edu)
074: */
075: public class XmlIngesterServiceImpl implements XmlIngesterService {
076: private static final Logger LOG = Logger
077: .getLogger(XmlIngesterServiceImpl.class);
078:
079: /**
080: * The entity resolver to use during validation
081: */
082: private EntityResolver resolver = new ClassLoaderEntityResolver();
083:
084: private XmlDigesterService digesterService;
085:
086: /**
087: * Whether to validate at all
088: */
089: private boolean validate = true;
090:
091: /**
092: * A list of service beans implementing XmlLoader, in the order of precedence
093: * in which we should load xml doc content types. It is implicit that any
094: * unspecified XmlLoader beans will be invoked after all ordered beans. In
095: * the future this may be configured through the use of a special marker (such as null)
096: * to indicate the "all others" set, but for now everything that is not specified
097: * just gets invoked last (in arbitrary order)
098: */
099: private List serviceOrder;
100:
101: // ---- bean properties
102:
103: public void setXmlDigesterService(XmlDigesterService digesterService) {
104: this .digesterService = digesterService;
105: }
106:
107: public void setEntityResolver(EntityResolver resolver) {
108: this .resolver = resolver;
109: }
110:
111: public void setServiceOrder(List serviceOrder)
112: throws BeanInitializationException {
113: //eat Strings for now
114: // Iterator orderIt = serviceOrder.iterator();
115: // while (orderIt.hasNext()) {
116: // Object o = orderIt.next();
117: // if (!(o instanceof XmlLoader)) {
118: // String message = o + " does not implement XmlLoader. Only XmlLoader beans can be specified in the service order!";
119: // LOG.error(message);
120: // throw new BeanInitializationException(message);
121: // }
122: // }
123: this .serviceOrder = serviceOrder;
124: }
125:
126: public void setValidate(boolean b) {
127: validate = b;
128: }
129:
130: // ---- implementation
131:
132: private static void addProcessingException(XmlDoc xmlDoc,
133: String message, Throwable t) {
134: String msg = xmlDoc.getProcessingMessage();
135: if (msg == null) {
136: msg = "";
137: }
138: msg += message + "\n" + Utilities.collectStackTrace(t);
139: xmlDoc.setProcessingMessage(msg);
140: }
141:
142: private static void validate(final XmlDoc xmlDoc,
143: EntityResolver resolver)
144: throws ParserConfigurationException, IOException,
145: SAXException {
146: DocumentBuilderFactory dbf = DocumentBuilderFactory
147: .newInstance();
148: dbf.setValidating(true);
149: dbf.setNamespaceAware(true);
150: dbf
151: .setAttribute(
152: "http://java.sun.com/xml/jaxp/properties/schemaLanguage",
153: XMLConstants.W3C_XML_SCHEMA_NS_URI);
154: DocumentBuilder db = dbf.newDocumentBuilder();
155: db.setEntityResolver(resolver);
156: db.setErrorHandler(new ErrorHandler() {
157: public void warning(SAXParseException se) {
158: LOG.warn("Warning parsing xml doc " + xmlDoc, se);
159: addProcessingException(xmlDoc,
160: "Warning parsing xml doc " + xmlDoc, se);
161: }
162:
163: public void error(SAXParseException se) throws SAXException {
164: LOG.error("Error parsing xml doc " + xmlDoc, se);
165: addProcessingException(xmlDoc, "Error parsing xml doc "
166: + xmlDoc, se);
167: throw se;
168: }
169:
170: public void fatalError(SAXParseException se)
171: throws SAXException {
172: LOG.error("Fatal error parsing xml doc " + xmlDoc, se);
173: addProcessingException(xmlDoc,
174: "Fatal error parsing xml doc " + xmlDoc, se);
175: throw se;
176: }
177: });
178: db.parse(xmlDoc.getStream());
179: }
180:
181: /**
182: * Validates (if possible) all XmlDocs, and accumulates only those
183: * which either were not possible to validate, or passed validation.
184: * @param collections collection of XmlDocCollection
185: * @param resolver the entity resolver to use
186: * @param successful xmldoccollections in which all docs successfully validated
187: * @param failed xmldoccollections in which one or more docs failed validation
188: */
189: private static void validate(List collections,
190: EntityResolver resolver, Set successful, Set failed) {
191: // for every collection, validate all docs
192: Iterator collectionIt = collections.iterator();
193: while (collectionIt.hasNext()) {
194: XmlDocCollection collection = (XmlDocCollection) collectionIt
195: .next();
196:
197: Iterator xmlDocIt = collection.getXmlDocs().iterator();
198: // for every xml doc in the collection, try to validate it
199: while (xmlDocIt.hasNext()) {
200: XmlDoc xmlDoc = (XmlDoc) xmlDocIt.next();
201: try {
202: validate(xmlDoc, resolver);
203: } catch (Exception e) {
204: LOG.error("Error validating doc: " + xmlDoc, e);
205: addProcessingException(xmlDoc,
206: "Error validating doc: " + xmlDoc, e);
207: // validation failed, so add collection to successful set
208: // do not break here, so that we can attempt validation on all
209: // docs in a collection; since validation has no side-effects
210: // we might as well validate all the docs now instead of forcing
211: // the user to continually re-submit
212: failed.add(collection);
213: }
214: }
215:
216: // all files validated, so add collection to successful set
217: successful.add(collection);
218: }
219: }
220:
221: private void ingest(XmlLoader xmlLoader,
222: Collection xmlDocCollections, WorkflowUser user,
223: Set successful, Set failed) {
224: Iterator xmlDocCollectionsIt = xmlDocCollections.iterator();
225: while (xmlDocCollectionsIt.hasNext()) {
226: XmlDocCollection xmlDocCollection = (XmlDocCollection) xmlDocCollectionsIt
227: .next();
228:
229: if (failed.contains(xmlDocCollection)) {
230: LOG.debug("Skipping " + xmlDocCollection.getFile()
231: + "...");
232: continue;
233: }
234:
235: try {
236: //SpringServiceLocator.getXmlDigesterService().digest(xmlLoader, xmlDocCollection, user);
237: digesterService.digest(xmlLoader, xmlDocCollection,
238: user);
239: } catch (Exception e) {
240: LOG
241: .error(
242: "Caught Exception loading xml data from "
243: + xmlDocCollection.getFile()
244: + ". Will move associated file to problem dir.",
245: e);
246: failed.add(xmlDocCollection);
247: }
248: }
249: }
250:
251: public Collection ingest(List collections) throws Exception {
252: return ingest(collections, null);
253: }
254:
255: private void ingestThroughOrderedLoaders(
256: Collection xmlDocCollections, WorkflowUser user,
257: Set successful, Set failed) {
258: LOG.debug("Ingesting through ordered XmlLoaders");
259: Iterator orderIt = serviceOrder.iterator();
260: while (orderIt.hasNext()) {
261: XmlLoader xmlLoader = (XmlLoader) KEWServiceLocator
262: .getService((String) orderIt.next());
263: LOG.debug("Ingesting through ordered XmlLoader: "
264: + xmlLoader);
265: ingest(xmlLoader, xmlDocCollections, user, successful,
266: failed);
267: }
268: }
269:
270: /* FIXME commented this out because its causing problems with the default User and Workgroup services
271: private void ingestThroughUnorderedLoaders(Collection xmlDocCollections, WorkflowUser user, Set successful, Set failed) {
272: Map beans = BeanFactoryUtils.beansOfTypeIncludingAncestors(context, XmlLoader.class);
273:
274: // ingest docs by remaining (if any) unordered services
275: LOG.debug("Ingesting through un-ordered XmlLoaders");
276: Iterator entryIt = beans.entrySet().iterator();
277: while (entryIt.hasNext()) {
278: Map.Entry entry = (Map.Entry) entryIt.next();
279: XmlLoader xmlLoader = (XmlLoader) entry.getValue();
280: if (serviceOrder.contains(entry.getKey())) {
281: LOG.debug("Skipping ordered XmlLoader: " + entry.getKey() + " " + xmlLoader.getClass());
282: continue;
283: }
284: LOG.debug("Ingesting through un-ordered XmlLoader: " + entry.getKey() + " " + xmlLoader.getClass());
285: ingest(xmlLoader, xmlDocs, user);
286: }
287: }*/
288:
289: public Collection ingest(List collections, WorkflowUser user) {
290: Set failed = new LinkedHashSet();
291: // validate all the docs up-front because we will be iterating over them
292: // multiple times: one for each XmlLoader. If we delegated validation to
293: // XmlDigesterService then the docs would re-validated over and over again,
294: // for each XmlLoader
295: if (validate) {
296: Set successful = new LinkedHashSet();
297: validate(collections, resolver, successful, failed);
298: collections = new LinkedList(successful);
299: }
300:
301: Set successful = new LinkedHashSet();
302: // ingest docs first by ordered services
303: ingestThroughOrderedLoaders(collections, user, successful,
304: failed);
305: // then by unordered services
306: collections = new LinkedList(successful);
307:
308: //ingestThroughUnorderedLoaders(collections, user, successful, failed);
309:
310: return failed;
311: }
312: }
|