001: /*
002: * @(#)DigestReader.java 1.2 04/12/06
003: *
004: * Copyright (c) 2003 Sun Microsystems, Inc. All Rights Reserved.
005: *
006: * See the file "LICENSE.txt" for information on usage and redistribution
007: * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
008: */
009: package pnuts.xml;
010:
011: import org.xml.sax.*;
012: import java.util.*;
013: import java.io.*;
014: import javax.xml.parsers.*;
015:
016: /**
017: * DigestReader is used to retrieve useful information, for the application, from a XML document.
018: * <p>
019: * XML documents are processed based on user-defined 'rules', which consists of [Action, Path, Key].
020: * </p>
021: * <BLOCKQUOTE><DL>
022: * <DT>Action
023: * <DD>A DigestAction object, or the associated name of the action.
024: * <DT>Path
025: * <DD>Qualified Names separated by slash (/). Wildcard (*) can be used instead of actual names.
026: * When <em>Path</em> starts with '//', it is used to match sub-elements.
027: * (e.g., /a/b/c, /a/ * /c, //b/c, // *)
028: * <DT>Key
029: * <DD>the key to access the intermediate result. If omitted, the relative path to the
030: * nearest ancestor node is implicitly defined.
031: * </BLOCKQUOTE></DL>
032: * <p>
033: * Those rules can be defined with the setRules() method or addRule() method.
034: * <p>
035: * User can define the action names with setAlias() method, passing a Map of String->DigestAction.
036: * <p>
037: *
038: * One of <code>parse()</code> methods processes a XML document based on the rules.
039: * <p>An example:
040: * <pre>
041: * import pnuts.xml.*;
042: * import pnuts.xml.action.*;
043: *
044: * DigestReader dr = new DigestReader();
045: * DigestAction text = new TextAction();
046: * DigestAction list = new ListAction();
047: * DigestAction map = new MapAction();
048: * Object[][] rules = {{text, "/rss/channel/title", "title"},
049: * {text, "/rss/channel/link", "link"},
050: * {text, "/rss/channel/description", "description"},
051: * {list, "/rss/channel/item", "item"},
052: * {text, "/rss/channel/item/title"},
053: * {text, "/rss/channel/item/link"},
054: * {text, "/rss/channel/item/description"}};
055: * dr.setRules(rules);
056: * Map doc = (Map)dr.parse(new FileInputStream("rss.xml"));
057: * </pre>
058: */
059: public class DigestReader extends DigestHandler {
060:
061: private SAXParser parser;
062: private Map aliasMap = new HashMap();
063: private EntityResolver entityResolver;
064:
065: /**
066: * Constructor
067: */
068: public DigestReader() {
069: this (getDefaultParser(), null);
070: }
071:
072: /**
073: * Constructor
074: *
075: * @param defs the rules that consist of 'Action', 'Path', and optional 'Key'.
076: * <DL>
077: * <DT>Action
078: * <DD>A DigestAction object, or the associated name of the action.
079: * <DT>Path
080: * <DD>A path identifier, which is a '/'-separated qualified names
081: * <DT>Key
082: * <DD>the key to access the intermediate result.
083: * </DL>
084: */
085: public DigestReader(Object[][] defs) {
086: this (getDefaultParser(), defs);
087: }
088:
089: /**
090: * Constructor
091: *
092: * @param parser a SAX parser
093: */
094: public DigestReader(SAXParser parser) {
095: this .parser = parser;
096: }
097:
098: /**
099: * Constructor
100: *
101: * @param parser a SAX parser
102: * @param defs the rules that consist of 'Action', 'Path', and optional 'Key'.
103: * <DL>
104: * <DT>Action
105: * <DD>A DigestAction object, or the associated name of the action.
106: * <DT>Path
107: * <DD>A path identifier, which is a '/'-separated qualified names
108: * <DT>Key
109: * <DD>the key to access the intermediate result.
110: * </DL>
111: */
112: public DigestReader(SAXParser parser, Object[][] defs) {
113: this .parser = parser;
114: if (defs != null) {
115: setRules(defs);
116: }
117: }
118:
119: /**
120: * Defines the alias map; ActionName -> DigestAction
121: *
122: * @param map the alias map
123: */
124: public void setAliases(Map map) {
125: aliasMap = map;
126: }
127:
128: /**
129: * Retrieves the alias map
130: *
131: * @return the alias map; ActionName -> DigestAction
132: */
133: public Map getAliases() {
134: return aliasMap;
135: }
136:
137: /**
138: * Sets the rules
139: *
140: * @param defs the rules that consist of 'Action', 'Path', and optional 'Key'.
141: * <DL>
142: * <DT>Action
143: * <DD>A DigestAction object, or the associated name of the action.
144: * <DT>Path
145: * <DD>A path identifier, which is a '/'-separated qualified names
146: * <DT>Key
147: * <DD>the key to access the intermediate result.
148: * </DL>
149: */
150: public void setRules(Object[][] defs) {
151: boolean useDefaultRuleSet = false;
152: for (int i = 0; i < defs.length; i++) {
153: String path = (String) defs[i][1];
154: if (path.startsWith("//") || path.indexOf("/*") >= 0) {
155: useDefaultRuleSet = true;
156: break;
157: }
158: }
159: if (useDefaultRuleSet) {
160: setRuleSet(new DefaultRuleSet());
161: } else {
162: setRuleSet(new SimpleRuleSet());
163: }
164: for (int i = 0; i < defs.length; i++) {
165: Object[] tpl = defs[i];
166: Object type = tpl[0];
167: String path = (String) tpl[1];
168: DigestAction action = null;
169: if (type instanceof DigestAction) {
170: action = (DigestAction) type;
171: } else {
172: action = (DigestAction) aliasMap.get(type);
173: }
174: if (action != null) {
175: String key = null;
176: if (tpl.length > 2) {
177: key = (String) tpl[2];
178: }
179: addRule(action, path, key);
180: }
181: }
182: }
183:
184: /**
185: * Processes a XML document with the registered rules, and return the result.
186: *
187: * @param input an input source
188: * @param value an initial value passed to the digest handler
189: * @return the object specified to <em>value</em>
190: */
191: public Object parse(InputSource input, Object value)
192: throws org.xml.sax.SAXException, IOException {
193: setValue(value);
194: if (!parser.isValidating()) {
195: entityResolver = new EntityResolver() {
196: public InputSource resolveEntity(String publicId,
197: String systemId)
198: throws org.xml.sax.SAXException {
199: return new InputSource(new NullInputStream());
200: }
201: };
202: }
203: parser.parse(input, this );
204: return getValue();
205: }
206:
207: /**
208: * Processes a XML document with the registered rules, and return the result.
209: *
210: * @param input an input stream
211: * @param value an initial value passed to the digest handler
212: * @return the object specified to <em>value</em>
213: */
214: public Object parse(InputStream input, Object value)
215: throws org.xml.sax.SAXException, IOException {
216: return parse(new InputSource(input), value);
217: }
218:
219: /**
220: * Processes a XML document with the registered rules, and return the result.
221: *
222: * @param input an input source
223: * @return a Map object implicitly given.
224: */
225: public Object parse(InputSource input)
226: throws org.xml.sax.SAXException, IOException {
227: return parse(input, new HashMap());
228: }
229:
230: /**
231: * Processes a XML document with the registered rules, and return the result.
232: *
233: * @param input an input stream
234: * @return a Map object implicitly given.
235: */
236: public Object parse(InputStream input)
237: throws org.xml.sax.SAXException, IOException {
238: return parse(new InputSource(input));
239: }
240:
241: public InputSource resolveEntity(String publicId, String systemId)
242: throws org.xml.sax.SAXException {
243: if (entityResolver != null) {
244: try {
245: return entityResolver.resolveEntity(publicId, systemId);
246: } catch (IOException e) {
247: return null;
248: }
249: } else {
250: return null;
251: }
252: }
253:
254: static SAXParser getDefaultParser() {
255: try {
256: SAXParserFactory factory = SAXParserFactory.newInstance();
257: factory.setNamespaceAware(false);
258: factory.setValidating(false);
259: return factory.newSAXParser();
260: } catch (org.xml.sax.SAXException e1) {
261: } catch (ParserConfigurationException e2) {
262: }
263: return null;
264: }
265:
266: static class NullInputStream extends InputStream {
267:
268: public int read() throws IOException {
269: return -1;
270: }
271:
272: public int read(byte[] buf, int offse, int size)
273: throws IOException {
274: return -1;
275: }
276: }
277: }
|