001: /*
002: * Copyright 1999-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: /*
017: * $Id: IncrementalSAXSource_Xerces.java,v 1.16 2004/12/15 17:35:48 jycli Exp $
018: */
019:
020: package org.apache.xml.dtm.ref;
021:
022: import java.io.IOException;
023: import java.lang.reflect.Constructor;
024: import java.lang.reflect.Method;
025:
026: import org.apache.xerces.parsers.SAXParser;
027: import org.apache.xml.res.XMLErrorResources;
028: import org.apache.xml.res.XMLMessages;
029:
030: import org.xml.sax.InputSource;
031: import org.xml.sax.SAXException;
032: import org.xml.sax.XMLReader;
033:
034: /** <p>IncrementalSAXSource_Xerces takes advantage of the fact that Xerces1
035: * incremental mode is already a coroutine of sorts, and just wraps our
036: * IncrementalSAXSource API around it.</p>
037: *
038: * <p>Usage example: See main().</p>
039: *
040: * <p>Status: Passes simple main() unit-test. NEEDS JAVADOC.</p>
041: * */
042: public class IncrementalSAXSource_Xerces implements
043: IncrementalSAXSource {
044: //
045: // Reflection. To allow this to compile with both Xerces1 and Xerces2, which
046: // require very different methods and objects, we need to avoid static
047: // references to those APIs. So until Xerces2 is pervasive and we're willing
048: // to make it a prerequisite, we will rely upon relection.
049: //
050: Method fParseSomeSetup = null; // Xerces1 method
051: Method fParseSome = null; // Xerces1 method
052: Object fPullParserConfig = null; // Xerces2 pull control object
053: Method fConfigSetInput = null; // Xerces2 method
054: Method fConfigParse = null; // Xerces2 method
055: Method fSetInputSource = null; // Xerces2 pull control method
056: Constructor fConfigInputSourceCtor = null; // Xerces2 initialization method
057: Method fConfigSetByteStream = null; // Xerces2 initialization method
058: Method fConfigSetCharStream = null; // Xerces2 initialization method
059: Method fConfigSetEncoding = null; // Xerces2 initialization method
060: Method fReset = null; // Both Xerces1 and Xerces2, but diff. signatures
061:
062: //
063: // Data
064: //
065: SAXParser fIncrementalParser;
066: private boolean fParseInProgress = false;
067:
068: //
069: // Constructors
070: //
071:
072: /** Create a IncrementalSAXSource_Xerces, and create a SAXParser
073: * to go with it. Xerces2 incremental parsing is only supported if
074: * this constructor is used, due to limitations in the Xerces2 API (as of
075: * Beta 3). If you don't like that restriction, tell the Xerces folks that
076: * there should be a simpler way to request incremental SAX parsing.
077: * */
078: public IncrementalSAXSource_Xerces() throws NoSuchMethodException {
079: try {
080: // Xerces-2 incremental parsing support (as of Beta 3)
081: // ContentHandlers still get set on fIncrementalParser (to get
082: // conversion from XNI events to SAX events), but
083: // _control_ for incremental parsing must be exercised via the config.
084: //
085: // At this time there's no way to read the existing config, only
086: // to assert a new one... and only when creating a brand-new parser.
087: //
088: // Reflection is used to allow us to continue to compile against
089: // Xerces1. If/when we can abandon the older versions of the parser,
090: // this will simplify significantly.
091:
092: // If we can't get the magic constructor, no need to look further.
093: Class xniConfigClass = ObjectFactory
094: .findProviderClass(
095: "org.apache.xerces.xni.parser.XMLParserConfiguration",
096: ObjectFactory.findClassLoader(), true);
097: Class[] args1 = { xniConfigClass };
098: Constructor ctor = SAXParser.class.getConstructor(args1);
099:
100: // Build the parser configuration object. StandardParserConfiguration
101: // happens to implement XMLPullParserConfiguration, which is the API
102: // we're going to want to use.
103: Class xniStdConfigClass = ObjectFactory
104: .findProviderClass(
105: "org.apache.xerces.parsers.StandardParserConfiguration",
106: ObjectFactory.findClassLoader(), true);
107: fPullParserConfig = xniStdConfigClass.newInstance();
108: Object[] args2 = { fPullParserConfig };
109: fIncrementalParser = (SAXParser) ctor.newInstance(args2);
110:
111: // Preload all the needed the configuration methods... I want to know they're
112: // all here before we commit to trying to use them, just in case the
113: // API changes again.
114: Class fXniInputSourceClass = ObjectFactory
115: .findProviderClass(
116: "org.apache.xerces.xni.parser.XMLInputSource",
117: ObjectFactory.findClassLoader(), true);
118: Class[] args3 = { fXniInputSourceClass };
119: fConfigSetInput = xniStdConfigClass.getMethod(
120: "setInputSource", args3);
121:
122: Class[] args4 = { String.class, String.class, String.class };
123: fConfigInputSourceCtor = fXniInputSourceClass
124: .getConstructor(args4);
125: Class[] args5 = { java.io.InputStream.class };
126: fConfigSetByteStream = fXniInputSourceClass.getMethod(
127: "setByteStream", args5);
128: Class[] args6 = { java.io.Reader.class };
129: fConfigSetCharStream = fXniInputSourceClass.getMethod(
130: "setCharacterStream", args6);
131: Class[] args7 = { String.class };
132: fConfigSetEncoding = fXniInputSourceClass.getMethod(
133: "setEncoding", args7);
134:
135: Class[] argsb = { Boolean.TYPE };
136: fConfigParse = xniStdConfigClass.getMethod("parse", argsb);
137: Class[] noargs = new Class[0];
138: fReset = fIncrementalParser.getClass().getMethod("reset",
139: noargs);
140: } catch (Exception e) {
141: // Fallback if this fails (implemented in createIncrementalSAXSource) is
142: // to attempt Xerces-1 incremental setup. Can't do tail-call in
143: // constructor, so create new, copy Xerces-1 initialization,
144: // then throw it away... Ugh.
145: IncrementalSAXSource_Xerces dummy = new IncrementalSAXSource_Xerces(
146: new SAXParser());
147: this .fParseSomeSetup = dummy.fParseSomeSetup;
148: this .fParseSome = dummy.fParseSome;
149: this .fIncrementalParser = dummy.fIncrementalParser;
150: }
151: }
152:
153: /** Create a IncrementalSAXSource_Xerces wrapped around
154: * an existing SAXParser. Currently this works only for recent
155: * releases of Xerces-1. Xerces-2 incremental is currently possible
156: * only if we are allowed to create the parser instance, due to
157: * limitations in the API exposed by Xerces-2 Beta 3; see the
158: * no-args constructor for that code.
159: *
160: * @exception if the SAXParser class doesn't support the Xerces
161: * incremental parse operations. In that case, caller should
162: * fall back upon the IncrementalSAXSource_Filter approach.
163: * */
164: public IncrementalSAXSource_Xerces(SAXParser parser)
165: throws NoSuchMethodException {
166: // Reflection is used to allow us to compile against
167: // Xerces2. If/when we can abandon the older versions of the parser,
168: // this constructor will simply have to fail until/unless the
169: // Xerces2 incremental support is made available on previously
170: // constructed SAXParser instances.
171: fIncrementalParser = parser;
172: Class me = parser.getClass();
173: Class[] parms = { InputSource.class };
174: fParseSomeSetup = me.getMethod("parseSomeSetup", parms);
175: parms = new Class[0];
176: fParseSome = me.getMethod("parseSome", parms);
177: // Fallback if this fails (implemented in createIncrementalSAXSource) is
178: // to use IncrementalSAXSource_Filter rather than Xerces-specific code.
179: }
180:
181: //
182: // Factories
183: //
184: static public IncrementalSAXSource createIncrementalSAXSource() {
185: try {
186: return new IncrementalSAXSource_Xerces();
187: } catch (NoSuchMethodException e) {
188: // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
189: // Fall back on filtering solution.
190: IncrementalSAXSource_Filter iss = new IncrementalSAXSource_Filter();
191: iss.setXMLReader(new SAXParser());
192: return iss;
193: }
194: }
195:
196: static public IncrementalSAXSource createIncrementalSAXSource(
197: SAXParser parser) {
198: try {
199: return new IncrementalSAXSource_Xerces(parser);
200: } catch (NoSuchMethodException e) {
201: // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
202: // Fall back on filtering solution.
203: IncrementalSAXSource_Filter iss = new IncrementalSAXSource_Filter();
204: iss.setXMLReader(parser);
205: return iss;
206: }
207: }
208:
209: //
210: // Public methods
211: //
212:
213: // Register handler directly with the incremental parser
214: public void setContentHandler(org.xml.sax.ContentHandler handler) {
215: // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
216: // %OPT% Cast at asignment?
217: ((XMLReader) fIncrementalParser).setContentHandler(handler);
218: }
219:
220: // Register handler directly with the incremental parser
221: public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler) {
222: // Not supported by all SAX2 parsers but should work in Xerces:
223: try {
224: // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
225: // %OPT% Cast at asignment?
226: ((XMLReader) fIncrementalParser).setProperty(
227: "http://xml.org/sax/properties/lexical-handler",
228: handler);
229: } catch (org.xml.sax.SAXNotRecognizedException e) {
230: // Nothing we can do about it
231: } catch (org.xml.sax.SAXNotSupportedException e) {
232: // Nothing we can do about it
233: }
234: }
235:
236: // Register handler directly with the incremental parser
237: public void setDTDHandler(org.xml.sax.DTDHandler handler) {
238: // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
239: // %OPT% Cast at asignment?
240: ((XMLReader) fIncrementalParser).setDTDHandler(handler);
241: }
242:
243: //================================================================
244: /** startParse() is a simple API which tells the IncrementalSAXSource
245: * to begin reading a document.
246: *
247: * @throws SAXException is parse thread is already in progress
248: * or parsing can not be started.
249: * */
250: public void startParse(InputSource source) throws SAXException {
251: if (fIncrementalParser == null)
252: throw new SAXException(XMLMessages.createXMLMessage(
253: XMLErrorResources.ER_STARTPARSE_NEEDS_SAXPARSER,
254: null)); //"startParse needs a non-null SAXParser.");
255: if (fParseInProgress)
256: throw new SAXException(
257: XMLMessages
258: .createXMLMessage(
259: XMLErrorResources.ER_STARTPARSE_WHILE_PARSING,
260: null)); //"startParse may not be called while parsing.");
261:
262: boolean ok = false;
263:
264: try {
265: ok = parseSomeSetup(source);
266: } catch (Exception ex) {
267: throw new SAXException(ex);
268: }
269:
270: if (!ok)
271: throw new SAXException(XMLMessages.createXMLMessage(
272: XMLErrorResources.ER_COULD_NOT_INIT_PARSER, null)); //"could not initialize parser with");
273: }
274:
275: /** deliverMoreNodes() is a simple API which tells the coroutine
276: * parser that we need more nodes. This is intended to be called
277: * from one of our partner routines, and serves to encapsulate the
278: * details of how incremental parsing has been achieved.
279: *
280: * @param parsemore If true, tells the incremental parser to generate
281: * another chunk of output. If false, tells the parser that we're
282: * satisfied and it can terminate parsing of this document.
283: * @return Boolean.TRUE if the CoroutineParser believes more data may be available
284: * for further parsing. Boolean.FALSE if parsing ran to completion.
285: * Exception if the parser objected for some reason.
286: * */
287: public Object deliverMoreNodes(boolean parsemore) {
288: if (!parsemore) {
289: fParseInProgress = false;
290: return Boolean.FALSE;
291: }
292:
293: Object arg;
294: try {
295: boolean keepgoing = parseSome();
296: arg = keepgoing ? Boolean.TRUE : Boolean.FALSE;
297: } catch (SAXException ex) {
298: arg = ex;
299: } catch (IOException ex) {
300: arg = ex;
301: } catch (Exception ex) {
302: arg = new SAXException(ex);
303: }
304: return arg;
305: }
306:
307: // Private methods -- conveniences to hide the reflection details
308: private boolean parseSomeSetup(InputSource source)
309: throws SAXException, IOException, IllegalAccessException,
310: java.lang.reflect.InvocationTargetException,
311: java.lang.InstantiationException {
312: if (fConfigSetInput != null) {
313: // Obtain input from SAX inputSource object, construct XNI version of
314: // that object. Logic adapted from Xerces2.
315: Object[] parms1 = { source.getPublicId(),
316: source.getSystemId(), null };
317: Object xmlsource = fConfigInputSourceCtor
318: .newInstance(parms1);
319: Object[] parmsa = { source.getByteStream() };
320: fConfigSetByteStream.invoke(xmlsource, parmsa);
321: parmsa[0] = source.getCharacterStream();
322: fConfigSetCharStream.invoke(xmlsource, parmsa);
323: parmsa[0] = source.getEncoding();
324: fConfigSetEncoding.invoke(xmlsource, parmsa);
325:
326: // Bugzilla5272 patch suggested by Sandy Gao.
327: // Has to be reflection to run with Xerces2
328: // after compilation against Xerces1. or vice
329: // versa, due to return type mismatches.
330: Object[] noparms = new Object[0];
331: fReset.invoke(fIncrementalParser, noparms);
332:
333: parmsa[0] = xmlsource;
334: fConfigSetInput.invoke(fPullParserConfig, parmsa);
335:
336: // %REVIEW% Do first pull. Should we instead just return true?
337: return parseSome();
338: } else {
339: Object[] parm = { source };
340: Object ret = fParseSomeSetup.invoke(fIncrementalParser,
341: parm);
342: return ((Boolean) ret).booleanValue();
343: }
344: }
345:
346: // Would null work???
347: private static final Object[] noparms = new Object[0];
348: private static final Object[] parmsfalse = { Boolean.FALSE };
349:
350: private boolean parseSome() throws SAXException, IOException,
351: IllegalAccessException,
352: java.lang.reflect.InvocationTargetException {
353: // Take next parsing step, return false iff parsing complete:
354: if (fConfigSetInput != null) {
355: Object ret = (Boolean) (fConfigParse.invoke(
356: fPullParserConfig, parmsfalse));
357: return ((Boolean) ret).booleanValue();
358: } else {
359: Object ret = fParseSome.invoke(fIncrementalParser, noparms);
360: return ((Boolean) ret).booleanValue();
361: }
362: }
363:
364: //================================================================
365: /** Simple unit test. Attempt coroutine parsing of document indicated
366: * by first argument (as a URI), report progress.
367: */
368: public static void main(String args[]) {
369: System.out.println("Starting...");
370:
371: CoroutineManager co = new CoroutineManager();
372: int appCoroutineID = co.co_joinCoroutineSet(-1);
373: if (appCoroutineID == -1) {
374: System.out
375: .println("ERROR: Couldn't allocate coroutine number.\n");
376: return;
377: }
378: IncrementalSAXSource parser = createIncrementalSAXSource();
379:
380: // Use a serializer as our sample output
381: org.apache.xml.serialize.XMLSerializer trace;
382: trace = new org.apache.xml.serialize.XMLSerializer(System.out,
383: null);
384: parser.setContentHandler(trace);
385: parser.setLexicalHandler(trace);
386:
387: // Tell coroutine to begin parsing, run while parsing is in progress
388:
389: for (int arg = 0; arg < args.length; ++arg) {
390: try {
391: InputSource source = new InputSource(args[arg]);
392: Object result = null;
393: boolean more = true;
394: parser.startParse(source);
395: for (result = parser.deliverMoreNodes(more); result == Boolean.TRUE; result = parser
396: .deliverMoreNodes(more)) {
397: System.out
398: .println("\nSome parsing successful, trying more.\n");
399:
400: // Special test: Terminate parsing early.
401: if (arg + 1 < args.length
402: && "!".equals(args[arg + 1])) {
403: ++arg;
404: more = false;
405: }
406:
407: }
408:
409: if (result instanceof Boolean
410: && ((Boolean) result) == Boolean.FALSE) {
411: System.out
412: .println("\nParser ended (EOF or on request).\n");
413: } else if (result == null) {
414: System.out
415: .println("\nUNEXPECTED: Parser says shut down prematurely.\n");
416: } else if (result instanceof Exception) {
417: throw new org.apache.xml.utils.WrappedRuntimeException(
418: (Exception) result);
419: // System.out.println("\nParser threw exception:");
420: // ((Exception)result).printStackTrace();
421: }
422:
423: }
424:
425: catch (SAXException e) {
426: e.printStackTrace();
427: }
428: }
429:
430: }
431:
432: } // class IncrementalSAXSource_Xerces
|