001: /*
002: * This file or a portion of this file is licensed under the terms of
003: * the Globus Toolkit Public License, found in file ../GTPL, or at
004: * http://www.globus.org/toolkit/download/license.html. This notice must
005: * appear in redistributions of this file, with or without modification.
006: *
007: * Redistributions of this Software, with or without modification, must
008: * reproduce the GTPL in: (1) the Software, or (2) the Documentation or
009: * some other similar material which is provided with the Software (if
010: * any).
011: *
012: * Copyright 1999-2004 University of Chicago and The University of
013: * Southern California. All rights reserved.
014: */
015: package org.griphyn.vdl.parser;
016:
017: import org.griphyn.vdl.dax.*;
018: import org.griphyn.vdl.toolkit.Toolkit;
019: import org.griphyn.vdl.util.Logging;
020: import org.griphyn.vdl.util.VDLType;
021: import org.griphyn.vdl.classes.LFN;
022:
023: import org.xml.sax.*;
024: import org.xml.sax.helpers.DefaultHandler;
025: import javax.xml.parsers.*;
026: import java.io.*;
027: import java.util.ArrayList;
028: import java.util.List;
029: import java.util.HashMap;
030: import java.util.Map;
031: import java.util.Stack;
032:
033: /**
034: * This class uses the Xerces SAX2 parser to validate and parse an DAX
035: * document. This class extends the xerces DefaultHandler so that we
036: * only need to override callbacks of interest.
037:
038: * @author Jens-S. Vöckler
039: * @author Yong Zhao
040: * @version $Revision: 50 $
041: */
042:
043: public class DAXParser extends DefaultHandler {
044: private static String vendorParserClass = "org.apache.xerces.parsers.SAXParser";
045:
046: private XMLReader m_parser;
047:
048: //to print tags in hierarchy
049: private int m_depth;
050:
051: //keep some of the objects so that they can be referred later.
052: private ADAG m_adag;
053: private Job m_job;
054: private Profile m_profile;
055: String m_child;
056: String m_parent;
057:
058: //to indicate the parent element of the <filename> being processed.
059: static final int TAG_ADAG = 0;
060: static final int TAG_ARGUMENT = 1;
061: static final int TAG_PROFILE = 2;
062: static final int TAG_OTHER = 3;
063: private int m_tag;
064:
065: /**
066: * Keep the location within the document
067: */
068: private Locator m_location;
069:
070: /**
071: * A Hashmap to forward resolve namespaces that were encountered
072: * during parsing.
073: */
074: private Map m_forward;
075:
076: /**
077: * A Hashmap to reverse resolve namespaces that were encountered
078: * during parsing.
079: */
080: private Map m_reverse;
081:
082: /**
083: * Obtain our logger once for multiple uses.
084: */
085: private Logging m_log;
086:
087: /**
088: * Sets a feature while capturing failed features right here.
089: *
090: * @param uri is the feature's URI to modify
091: * @param flag is the new value to set.
092: * @return true, if the feature could be set, false for an exception
093: */
094: private boolean set(String uri, boolean flag) {
095: boolean result = false;
096: try {
097: this .m_parser.setFeature(uri, flag);
098: result = true;
099: } catch (SAXException se) {
100: Logging.instance().log("default", 0,
101: "Could not set parser feature " + se.getMessage());
102: }
103: return result;
104: }
105:
106: /**
107: * The class constructor
108: * This function initializes the xerces parser, sets the classes that
109: * hold the callback functions, and the features that enable schema
110: * validation.
111: */
112: public DAXParser(String schemaLocation) {
113: this .m_forward = new HashMap();
114: this .m_reverse = new HashMap();
115: this .m_log = Logging.instance();
116:
117: try {
118: m_parser = (XMLReader) Class.forName(vendorParserClass)
119: .newInstance();
120: m_parser.setContentHandler(this );
121: m_parser.setErrorHandler(this );
122:
123: set("http://xml.org/sax/features/validation", true);
124: set("http://apache.org/xml/features/validation/dynamic",
125: true);
126: set("http://apache.org/xml/features/validation/schema",
127: true);
128: // time+memory consuming, see http://xml.apache.org/xerces2-j/features.html
129: // set( "http://apache.org/xml/features/validation/schema-full-checking", true );
130:
131: // Send XML Schema element default values via characters().
132: set(
133: "http://apache.org/xml/features/validation/schema/element-default",
134: true);
135: set(
136: "http://apache.org/xml/features/validation/warn-on-duplicate-attdef",
137: true);
138: // mysteriously, this one fails with recent Xerces
139: // set( "http://apache.org/xml/features/validation/warn-on-undeclared-elemdef", true );
140: set(
141: "http://apache.org/xml/features/warn-on-duplicate-entitydef",
142: true);
143:
144: // set the schema default location.
145: if (schemaLocation != null) {
146: setSchemaLocations(ADAG.SCHEMA_NAMESPACE + ' '
147: + schemaLocation);
148: m_log.log("app", 2, "will use " + schemaLocation);
149: } else {
150: m_log.log("app", 2, "will use document schema hint");
151: }
152: } catch (ClassNotFoundException e) {
153: m_log.log("default", 0,
154: "The SAXParser class was not found: " + e);
155: } catch (InstantiationException e) {
156: m_log.log("default", 0,
157: "The SAXParser class could not be instantiated: "
158: + e);
159: } catch (IllegalAccessException e) {
160: m_log.log("default", 0,
161: "The SAXParser class could not be accessed: " + e);
162: }
163: }
164:
165: /**
166: * Set the list of external real locations where the XML schema may be found.
167: * Since this list can be determined at run-time through properties etc., we
168: * expect this function to be called between instantiating the parser, and
169: * using the parser.
170: *
171: * @param list is a list of strings representing schema locations. The content
172: * exists in pairs, one of the namespace URI, one of the location URL.
173: */
174: public void setSchemaLocations(String list) {
175: // schema location handling
176: try {
177: m_parser
178: .setProperty(
179: "http://apache.org/xml/properties/schema/external-schemaLocation",
180: list);
181: } catch (SAXException se) {
182: m_log.log("default", 0, "The SAXParser reported an error: "
183: + se);
184: }
185: }
186:
187: /**
188: * This function parses a DAX source (could be a document, a stream,
189: * etc.), and creates java class instances that correspond to the DAX.
190: *
191: * @param daxURI is the URI for the DAX source.
192: * @return an instance of class <code>ADAG</code>, which keeps the
193: * information about the LFNs, Jobs and Child-Parent relations specified
194: * in the DAX source.
195: * @see org.griphyn.vdl.dax.ADAG
196: */
197: public ADAG parse(String daxURI) {
198: try {
199: m_adag = null;
200: InputSource inputSource = new InputSource(daxURI);
201: m_parser.parse(inputSource);
202:
203: if (m_adag != null) {
204: m_log.log("parser", 3, "*** SUCCESS ***");
205: m_log.log("DAXparser", 3, "read " + m_adag.getSize()
206: + " dags");
207: return m_adag;
208: }
209: } catch (SAXException e) {
210: m_log.log("default", 0, "SAX Error: " + e);
211: } catch (IOException e) {
212: m_log.log("default", 0, "IO Error: " + e);
213: }
214:
215: return null;
216: }
217:
218: /**
219: * This function parses a DAX source (could be a document, a stream,
220: * etc.), and creates java class instances that correspond to the DAX.
221: *
222: * @param stream is an input stream for the DAX source.
223: * @return an instance of class <code>ADAG</code>, which keeps the
224: * information about the LFNs, Jobs and Child-Parent relations specified
225: * in the DAX source.
226: * @see org.griphyn.vdl.dax.ADAG
227: */
228: public ADAG parse(InputStream stream) {
229: try {
230: m_adag = null;
231: InputSource inputSource = new InputSource(stream);
232: m_parser.parse(inputSource);
233:
234: if (m_adag != null) {
235: m_log.log("parser", 3, "*** SUCCESS ***");
236: m_log.log("DAXparser", 3, "read " + m_adag.getSize()
237: + " dags");
238: return m_adag;
239: }
240: } catch (SAXException e) {
241: m_log.log("default", 0, "SAX Error: " + e);
242: } catch (IOException e) {
243: m_log.log("default", 0, "IO Error: " + e);
244: }
245:
246: return null;
247: }
248:
249: //
250: // here starts the implementation to the Interface
251: //
252:
253: /**
254: * Obtains the document locator from the parser. The document location
255: * can be used to print debug information, i.e the current location
256: * (line, column) in the document.
257: *
258: * @param locator is the externally set current position
259: */
260: public void setDocumentLocator(Locator locator) {
261: this .m_location = locator;
262: }
263:
264: /**
265: * This method specifies what to do when the parser is at the beginning
266: * of the document. In this case, we simply print a message for debugging.
267: */
268: public void startDocument() {
269: m_depth = 0;
270: m_log.log("parser", 1, "*** start of document ***");
271: }
272:
273: /**
274: * The parser comes to the end of the document.
275: */
276: public void endDocument() {
277: m_log.log("parser", 1, "*** end of document ***");
278: }
279:
280: /**
281: * There is a prefix or namespace defined, put the prefix and its URI
282: * in the HashMap. We can get the URI when the prefix is used here after.
283: *
284: * @param prefix the Namespace prefix being declared.
285: * @param uri the Namespace URI the prefix is mapped to.
286: */
287: public void startPrefixMapping(java.lang.String prefix,
288: java.lang.String uri) throws SAXException {
289: String p = prefix == null ? null : new String(prefix);
290: String u = uri == null ? null : new String(uri);
291: m_log.log("parser", 2, "adding \"" + p + "\" <=> " + u);
292:
293: if (!this .m_forward.containsKey(p))
294: this .m_forward.put(p, new Stack());
295: ((Stack) this .m_forward.get(p)).push(u);
296:
297: if (!this .m_reverse.containsKey(u))
298: this .m_reverse.put(u, new Stack());
299: ((Stack) this .m_reverse.get(u)).push(p);
300: }
301:
302: /**
303: * Out of the reach of the prefix, remove it from the HashMap.
304: *
305: * @param prefix is the prefix that was being mapped previously.
306: */
307: public void endPrefixMapping(java.lang.String prefix)
308: throws SAXException {
309: String u = (String) ((Stack) this .m_forward.get(prefix)).pop();
310: String p = (String) ((Stack) this .m_reverse.get(u)).pop();
311: m_log.log("parser", 2, "removed \"" + p + "\" <=> " + u);
312: }
313:
314: /**
315: * Helper function to map prefixes correctly onto the elements.
316: *
317: * @param uri is the parser-returned URI that needs translation.
318: * @return the correct prefix for the URI
319: */
320: private String map(String uri) {
321: if (uri == null || uri.length() == 0)
322: return "";
323: Stack stack = (Stack) this .m_reverse.get(uri);
324: String result = stack == null ? null : (String) stack.peek();
325: if (result == null || result.length() == 0)
326: return "";
327: else
328: return result + ':';
329: }
330:
331: /**
332: * This method defines the action to take when the parser begins to parse
333: * an element.
334: *
335: * @param namespaceURI is the URI of the namespace for the element
336: * @param localName is the element name without namespace
337: * @param qName is the element name as it appears in the docment
338: * @param atts has the names and values of all the attributes
339: */
340: public void startElement(java.lang.String namespaceURI,
341: java.lang.String localName, java.lang.String qName,
342: Attributes atts) throws SAXException {
343:
344: m_log.log("parser", 3, "<" + map(namespaceURI) + localName
345: + "> at " + m_location.getLineNumber() + ":"
346: + m_location.getColumnNumber());
347:
348: // yup, one more element level
349: m_depth++;
350:
351: java.util.List names = new java.util.ArrayList();
352: java.util.List values = new java.util.ArrayList();
353: for (int i = 0; i < atts.getLength(); ++i) {
354: String name = new String(atts.getLocalName(i));
355: String value = new String(atts.getValue(i));
356:
357: m_log.log("parser", 2, "attribute " + map(atts.getURI(i))
358: + name + "=\"" + value + "\"");
359: names.add(name);
360: values.add(value);
361: }
362:
363: createElementObject(qName, names, values);
364: }
365:
366: /**
367: * The parser is at the end of an element. Each successfully and
368: * completely parsed Definition will trigger a callback to the
369: * registered DefinitionHandler.
370: *
371: * @param namespaceURI is the URI of the namespace for the element
372: * @param localName is the element name without namespace
373: * @param qName is the element name as it appears in the docment
374: */
375: public void endElement(java.lang.String namespaceURI,
376: java.lang.String localName, java.lang.String qName)
377: throws SAXException {
378: // that's it for this level
379: m_depth--;
380: m_log.log("parser", 3, "</" + map(namespaceURI) + localName
381: + "> at " + m_location.getLineNumber() + ":"
382: + m_location.getColumnNumber());
383:
384: setElementRelation(qName);
385: }
386:
387: /**
388: * This method is the callback function for characters in an element.
389: * The element should be mixed-content.
390: *
391: * @param ch are the characters from the XML document
392: * @param start is the start position into the array
393: * @param length is the amount of valid data in the array
394: */
395: public void characters(char[] ch, int start, int length)
396: throws SAXException {
397: String message = new String(ch, start, length);
398: if (message.length() > 0) {
399: if (message.trim().length() == 0)
400: m_log.log("parser", 3, "Characters: whitespace x "
401: + length);
402: else
403: m_log.log("parser", 3, "Characters: \"" + message
404: + "\"");
405: elementCharacters(message);
406: }
407: }
408:
409: /**
410: * Currently, ignorable whitespace will be ignored.
411: *
412: * @param ch are the characters from the XML document
413: * @param start is the start position into the array
414: * @param length is the amount of valid data in the array
415: */
416: public void ignorableWhitespace(char[] ch, int start, int length)
417: throws SAXException {
418: m_log.log("parser", 3, "Ignoring " + length + " whitespaces");
419: }
420:
421: /**
422: * Receive a processing instruction. Currently, we are just printing
423: * a debug message that we received a PI.
424: *
425: * @param target the processing instruction target
426: * @param data the processing instruction data, or null if none was supplied.
427: * The data does not include any whitespace separating it from the target.
428: */
429: public void processingInstruction(java.lang.String target,
430: java.lang.String data) throws SAXException {
431: m_log.log("parser", 2, "processing instruction " + target
432: + "=\"" + data + "\" was skipped!");
433: }
434:
435: /**
436: * Receive a notification that an entity was skipped. Currently, we
437: * are just printing a debug message to this fact.
438: *
439: * @param name The name of the skipped entity. If it is a parameter
440: * entity, the name will begin with '%', and if it is the external DTD
441: * subset, it will be the string "[dtd]".
442: */
443: public void skippedEntity(java.lang.String name)
444: throws SAXException {
445: m_log.log("parser", 2, "entity " + name + " was skipped!");
446: }
447:
448: //
449: // =================================================== our own stuff ===
450: //
451:
452: /**
453: * Small helper method to bundle repetitive parameters in a template
454: * for reporting progress.
455: *
456: * @param subject is the name of the XML element that is being scrutinized.
457: * @param name is then name of the element we are working with.
458: * @param value is the attribute value.
459: */
460: private void log(String subject, String name, String value) {
461: if (value == null)
462: value = new String();
463: m_log.log("parser", 3, subject + "." + name + "=\"" + value
464: + "\"");
465: }
466:
467: /**
468: * Small helper method to bundle repetitive complaints in a template
469: * for reporting progress.
470: *
471: * @param subject is the name of the XML element that is being scrutinized.
472: * @param name is then name of the element we are working with.
473: * @param value is the attribute value.
474: */
475: private void complain(String subject, String name, String value) {
476: if (value == null)
477: value = new String();
478: m_log.log("default", 0, "ignoring " + subject + '@' + name
479: + "=\"" + value + '"', true);
480: }
481:
482: /**
483: * This method finds out what is the current element, creates the
484: * java object that corresponds to the element, and sets the member
485: * variables with the values of the attributes of the element.
486: *
487: * @param e is the name of the element
488: * @param names is a list of attribute names, as strings.
489: * @param values is a list of attribute values, to match the key list.
490: */
491: public void createElementObject(String e, java.util.List names,
492: java.util.List values) throws IllegalArgumentException {
493: // invalid length
494: if (e == null || e.length() < 1)
495: throw new IllegalArgumentException("illegal element length");
496:
497: if (e.equals("adag")) {
498: m_tag = TAG_ADAG;
499: m_adag = new ADAG();
500: for (int i = 0; i < names.size(); ++i) {
501: String name = (String) names.get(i);
502: String value = (String) values.get(i);
503:
504: if (name.equals("name")) {
505: this .log(e, name, value);
506: m_adag.setName(value);
507: } else if (name.equals("index")) {
508: this .log(e, name, value);
509: m_adag.setIndex(Integer.parseInt(value));
510: } else if (name.equals("count")) {
511: this .log(e, name, value);
512: m_adag.setSize(Integer.parseInt(value));
513: } else if (name.equals("version")) {
514: this .log(e, name, value);
515: m_adag.setVersion(value);
516: } else if (name.equals("jobCount")) {
517: this .log(e, name, value);
518: // ignore
519: } else if (name.equals("fileCount")) {
520: this .log(e, name, value);
521: // ignore
522: } else if (name.equals("childCount")) {
523: this .log(e, name, value);
524: // ignore
525: } else if (name.equals("schemaLocation")) {
526: // ignore
527: } else {
528: this .complain(e, name, value);
529: }
530: }
531: return;
532: }
533:
534: if (e.equals("filename") || e.equals("stdin")
535: || e.equals("stdout") || e.equals("stderr")
536: || e.equals("uses")) {
537: Filename fn = new Filename();
538: for (int i = 0; i < names.size(); ++i) {
539: String name = (String) names.get(i);
540: String value = (String) values.get(i);
541:
542: if (name.equals("file")) {
543: this .log(e, name, value);
544: fn.setFilename(value);
545: } else if (name.equals("link")) {
546: this .log(e, name, value);
547: fn.setLink(VDLType.getLinkType(value));
548: } else if (name.equals("optional")) {
549: this .log(e, name, value);
550: fn.setOptional(new Boolean(value).booleanValue());
551: } else if (name.equals("dontRegister")) {
552: this .log(e, name, value);
553: fn.setDontRegister(new Boolean(value)
554: .booleanValue());
555: } else if (name.equals("dontTransfer")) {
556: // parse tri-state
557: if (value.equals("false")) {
558: this .log(e, name, value);
559: fn.setDontTransfer(LFN.XFER_MANDATORY);
560: } else if (value.equals("true")) {
561: this .log(e, name, value);
562: fn.setDontTransfer(LFN.XFER_NOT);
563: } else if (value.equals("optional")) {
564: this .log(e, name, value);
565: fn.setDontTransfer(LFN.XFER_OPTIONAL);
566: } else {
567: this .complain(e, name, value);
568: }
569: } else if (name.equals("isTemporary")) {
570: this .log(e, name, value);
571: boolean temp = (new Boolean(value)).booleanValue();
572: fn.setDontRegister(temp);
573: fn.setDontTransfer(temp ? LFN.XFER_NOT
574: : LFN.XFER_MANDATORY);
575: } else if (name.equals("temporaryHint")) {
576: this .log(e, name, value);
577: fn.setTemporary(value);
578: } else if (name.equals("varname")) {
579: this .log(e, name, value);
580: fn.setVariable(value);
581: } else {
582: this .complain(e, name, value);
583: }
584: }
585:
586: if (e.equals("filename")) {
587: switch (m_tag) {
588: case TAG_ADAG:
589: m_adag.setFilename(fn);
590: break;
591: case TAG_PROFILE:
592: m_profile.addLeaf(fn);
593: break;
594: case TAG_ARGUMENT:
595: m_job.addArgument(fn);
596: }
597: } else {
598: m_tag = TAG_OTHER;
599:
600: if (e.equals("stdin"))
601: m_job.setStdin(fn);
602: else if (e.equals("stdout"))
603: m_job.setStdout(fn);
604: else if (e.equals("stderr"))
605: m_job.setStderr(fn);
606: else if (e.equals("uses"))
607: m_job.addUses(fn);
608: }
609: return;
610: }
611:
612: if (e.equals("job")) {
613: m_job = new Job();
614: for (int i = 0; i < names.size(); ++i) {
615: String name = (String) names.get(i);
616: String value = (String) values.get(i);
617:
618: if (name.equals("name")) {
619: this .log(e, name, value);
620: m_job.setName(value);
621: } else if (name.equals("level")) {
622: this .log(e, name, value);
623: m_job.setLevel(Integer.parseInt(value));
624: } else if (name.equals("namespace")) {
625: this .log(e, name, value);
626: m_job.setNamespace(value);
627: } else if (name.equals("version")) {
628: this .log(e, name, value);
629: m_job.setVersion(value);
630: } else if (name.equals("compound")) {
631: this .log(e, name, value);
632: m_job.setChain(value);
633: } else if (name.equals("id")) {
634: this .log(e, name, value);
635: m_job.setID(value);
636: } else if (name.equals("dv-namespace")) {
637: this .log(e, name, value);
638: m_job.setDVNamespace(value);
639: } else if (name.equals("dv-name")) {
640: this .log(e, name, value);
641: m_job.setDVName(value);
642: } else if (name.equals("dv-version")) {
643: this .log(e, name, value);
644: m_job.setDVVersion(value);
645: } else {
646: this .complain(e, name, value);
647: }
648: }
649: return;
650: }
651:
652: if (e.equals("child")) {
653: for (int i = 0; i < names.size(); ++i) {
654: String name = (String) names.get(i);
655: String value = (String) values.get(i);
656:
657: if (name.equals("ref")) {
658: this .log(e, name, value);
659: m_child = value;
660: } else {
661: this .complain(e, name, value);
662: }
663: }
664: m_adag.addChild(m_child);
665: return;
666: }
667:
668: if (e.equals("parent")) {
669: for (int i = 0; i < names.size(); ++i) {
670: String name = (String) names.get(i);
671: String value = (String) values.get(i);
672:
673: if (name.equals("ref")) {
674: this .log(e, name, value);
675: m_parent = value;
676: } else {
677: this .complain(e, name, value);
678: }
679: }
680: m_adag.addChild(m_child, m_parent);
681: return;
682: }
683:
684: if (e.equals("argument")) {
685: m_tag = TAG_ARGUMENT;
686: return;
687: }
688:
689: if (e.equals("profile")) {
690: m_profile = new Profile();
691: m_tag = TAG_PROFILE;
692: for (int i = 0; i < names.size(); ++i) {
693: String name = (String) names.get(i);
694: String value = (String) values.get(i);
695:
696: if (name.equals("namespace")) {
697: this .log(e, name, value);
698: m_profile.setNamespace(value);
699: } else if (name.equals("key")) {
700: this .log(e, name, value);
701: m_profile.setKey(value);
702: } else if (name.equals("origin")) {
703: this .log(e, name, value);
704: m_profile.setOrigin(value);
705: } else {
706: this .complain(e, name, value);
707: }
708: }
709: return;
710: }
711:
712: // FIXME: shouldn't this be an exception?
713: m_log.log("filler", 0, "Error: No rules defined for element "
714: + e);
715: }
716:
717: /**
718: * This method sets the relations between the current java object
719: * and its parent object according to the element hierarchy.
720: * Usually it involves adding the object to the parent's child object
721: * list.
722: */
723: public void setElementRelation(String elementName) {
724: if (elementName.equals("profile")) {
725: m_job.addProfile(m_profile);
726: m_tag = TAG_OTHER;
727: } else if (elementName.equals("job")) {
728: m_tag = TAG_ADAG;
729: m_adag.addJob(m_job);
730: m_log.log("filler", 3, "Adding job " + m_job.getID());
731: } else if (elementName.equals("argument")) {
732: m_tag = TAG_OTHER;
733: } else {
734: // m_log.log( "filler", 0, "Cannot guess parent for " + elementName );
735: }
736: }
737:
738: /**
739: * This method sets the content of the java object corresponding to
740: * the element "text", which has mixed content.
741: * @see org.griphyn.vdl.classes.Text
742: */
743: public void elementCharacters(String elementChars) {
744: PseudoText text = new PseudoText(elementChars);
745:
746: switch (m_tag) {
747: case TAG_PROFILE:
748: m_profile.addLeaf(text);
749: this .log("profile", "text", elementChars);
750: break;
751: case TAG_ARGUMENT:
752: m_job.addArgument(text);
753: this .log("argument", "text", elementChars);
754: }
755: }
756: }
|