001: package net.sf.saxon.event;
002:
003: import net.sf.saxon.Configuration;
004: import net.sf.saxon.value.Whitespace;
005: import net.sf.saxon.om.*;
006: import net.sf.saxon.style.StandardNames;
007: import net.sf.saxon.tinytree.CharSlice;
008: import net.sf.saxon.trans.XPathException;
009: import net.sf.saxon.type.ValidationException;
010: import org.xml.sax.*;
011: import org.xml.sax.ext.LexicalHandler;
012:
013: import javax.xml.transform.TransformerException;
014: import java.net.URI;
015: import java.net.URISyntaxException;
016: import java.util.HashMap;
017:
018: /**
019: * ReceivingContentHandler is a glue class that provides a standard SAX ContentHandler
020: * interface to a Saxon Receiver. To achieve this it needs to map names supplied
021: * as strings to numeric name codes, for which purpose it needs access to a name
022: * pool. The class also performs the function of assembling adjacent text nodes.
023: * <p>The class was previously named ContentEmitter.</p>
024: * @author Michael H. Kay
025: */
026:
027: public class ReceivingContentHandler implements ContentHandler,
028: LexicalHandler, DTDHandler, SaxonLocator {
029: private NamePool pool;
030: private PipelineConfiguration pipe;
031: private Receiver receiver;
032: private boolean inDTD = false; // true while processing the DTD
033: private Locator locator; // a SAX Locator
034:
035: // buffer for accumulating character data, until the next markup event is received
036:
037: private char[] buffer = new char[4096];
038: private int used = 0;
039: private CharSlice slice = new CharSlice(buffer, 0, 0);
040:
041: // array for accumulating namespace information
042:
043: private int[] namespaces = new int[20];
044: private int namespacesUsed = 0;
045:
046: // determine whether ignorable whitespace is ignored
047:
048: private boolean ignoreIgnorable = false;
049:
050: // determine whether DTD attribute types are retained
051:
052: private boolean retainDTDAttributeTypes = false;
053:
054: /**
055: * A local cache is used to avoid allocating namecodes for the same name more than once.
056: * This reduces contention on the NamePool. This is a two-level hashmap: the first level
057: * has the namespace URI as its key, and returns a HashMap which maps lexical QNames to integer
058: * namecodes.
059: */
060:
061: private HashMap cache = new HashMap(10);
062: private HashMap noNamespaceMap;
063:
064: /**
065: * create a ReceivingContentHandler and initialise variables
066: */
067:
068: public ReceivingContentHandler() {
069: }
070:
071: public void setReceiver(Receiver e) {
072: receiver = e;
073: }
074:
075: public void setPipelineConfiguration(PipelineConfiguration pipe) {
076: this .pipe = pipe;
077: pipe.setLocationProvider(this );
078: Configuration config = pipe.getConfiguration();
079: this .pool = config.getNamePool();
080: ignoreIgnorable = config.getStripsWhiteSpace() != Whitespace.NONE;
081: retainDTDAttributeTypes = config.isRetainDTDAttributeTypes();
082: }
083:
084: public PipelineConfiguration getPipelineConfiguration() {
085: return pipe;
086: }
087:
088: public Configuration getConfiguration() {
089: return pipe.getConfiguration();
090: }
091:
092: /**
093: * Set whether "ignorable whitespace" should be ignored. This method is effective only
094: * if called after setPipelineConfiguration, since the default value is taken from the
095: * configuration.
096: */
097:
098: public void setIgnoreIgnorableWhitespace(boolean ignore) {
099: ignoreIgnorable = ignore;
100: }
101:
102: /**
103: * Determine whether "ignorable whitespace" is ignored. This returns the value that was set
104: * using {@link #setIgnoreIgnorableWhitespace} if that has been called; otherwise the value
105: * from the configuration.
106: */
107:
108: public boolean isIgnoringIgnorableWhitespace() {
109: return ignoreIgnorable;
110: }
111:
112: /**
113: * Callback interface for SAX: not for application use
114: */
115:
116: public void startDocument() throws SAXException {
117: // System.err.println("ReceivingContentHandler#startDocument");
118: try {
119: used = 0;
120: namespacesUsed = 0;
121: pipe.setLocationProvider(this );
122: receiver.setPipelineConfiguration(pipe);
123: receiver.open();
124: receiver.startDocument(0);
125: } catch (XPathException err) {
126: throw new SAXException(err);
127: }
128: }
129:
130: /**
131: * Callback interface for SAX: not for application use
132: */
133:
134: public void endDocument() throws SAXException {
135: try {
136: flush();
137: receiver.endDocument();
138: receiver.close();
139: } catch (ValidationException err) {
140: err.setLocator(locator);
141: throw new SAXException(err);
142: } catch (XPathException err) {
143: throw new SAXException(err);
144: }
145: }
146:
147: /**
148: * Callback interface for SAX: not for application use
149: */
150:
151: public void setDocumentLocator(Locator locator) {
152: this .locator = locator;
153: }
154:
155: /**
156: * Callback interface for SAX: not for application use
157: */
158:
159: public void startPrefixMapping(String prefix, String uri)
160: throws SAXException {
161: //System.err.println("StartPrefixMapping " + prefix + "=" + uri);
162: if (prefix.equals("xmlns")) {
163: // the binding xmlns:xmlns="http://www.w3.org/2000/xmlns/"
164: // should never be reported, but it's been known to happen
165: return;
166: }
167: if (namespacesUsed >= namespaces.length) {
168: int[] n2 = new int[namespacesUsed * 2];
169: System.arraycopy(namespaces, 0, n2, 0, namespacesUsed);
170: namespaces = n2;
171: }
172: namespaces[namespacesUsed++] = pool.allocateNamespaceCode(
173: prefix, uri);
174: }
175:
176: /**
177: * Callback interface for SAX: not for application use
178: */
179:
180: public void endPrefixMapping(String prefix) throws SAXException {
181: }
182:
183: /**
184: * Callback interface for SAX: not for application use
185: */
186: public void startElement(String uri, String localname,
187: String rawname, Attributes atts) throws SAXException {
188: // System.err.println("ReceivingContentHandler#startElement " + uri + "," + localname + "," + rawname + " at line " + locator.getLineNumber());
189: //for (int a=0; a<atts.getLength(); a++) {
190: // System.err.println(" Attribute " + atts.getURI(a) + "/" + atts.getLocalName(a) + "/" + atts.getQName(a));
191: //}
192: try {
193: flush();
194:
195: int nameCode = getNameCode(uri, localname, rawname);
196: receiver.startElement(nameCode, StandardNames.XDT_UNTYPED,
197: 0, 0);
198:
199: for (int n = 0; n < namespacesUsed; n++) {
200: receiver.namespace(namespaces[n], 0);
201: }
202:
203: for (int a = 0; a < atts.getLength(); a++) {
204: int properties = 0;
205: String qname = atts.getQName(a);
206: if (qname.startsWith("xmlns")
207: && (qname.equals("xmlns") || qname
208: .startsWith("xmlns:"))) {
209: // We normally configure the parser so that it doesn't notify namespaces as attributes.
210: // But when running as a TransformerHandler, we have no control over the feature settings
211: // of the sender of the events. So we filter them out, just in case. There might be cases
212: // where we ought not just to ignore them, but to handle them as namespace events, but
213: // we'll cross that bridge when we come to it.
214: continue;
215: }
216: int attCode = getNameCode(atts.getURI(a), atts
217: .getLocalName(a), atts.getQName(a));
218: String type = atts.getType(a);
219: int typeCode = StandardNames.XDT_UNTYPED_ATOMIC;
220: if (retainDTDAttributeTypes) {
221: if (type.equals("CDATA")) {
222: // no action
223: } else if (type.equals("ID")) {
224: typeCode = StandardNames.XS_ID;
225: } else if (type.equals("IDREF")) {
226: typeCode = StandardNames.XS_IDREF;
227: } else if (type.equals("IDREFS")) {
228: typeCode = StandardNames.XS_IDREFS;
229: } else if (type.equals("NMTOKEN")) {
230: typeCode = StandardNames.XS_NMTOKEN;
231: } else if (type.equals("NMTOKENS")) {
232: typeCode = StandardNames.XS_NMTOKENS;
233: } else if (type.equals("ENTITY")) {
234: typeCode = StandardNames.XS_ENTITY;
235: } else if (type.equals("ENTITIES")) {
236: typeCode = StandardNames.XS_ENTITIES;
237: }
238: } else {
239: if (type.equals("ID")) {
240: typeCode = StandardNames.XS_ID
241: | NodeInfo.IS_DTD_TYPE;
242: } else if (type.equals("IDREF")) {
243: typeCode = StandardNames.XS_IDREF
244: | NodeInfo.IS_DTD_TYPE;
245: } else if (type.equals("IDREFS")) {
246: typeCode = StandardNames.XS_IDREFS
247: | NodeInfo.IS_DTD_TYPE;
248: }
249: }
250:
251: receiver.attribute(attCode, typeCode, atts.getValue(a),
252: 0, properties);
253: }
254:
255: receiver.startContent();
256:
257: namespacesUsed = 0;
258: } catch (ValidationException err) {
259: if (err.getLineNumber() == -1) {
260: err.setLocator(locator);
261: }
262: throw new SAXException(err);
263: } catch (XPathException err) {
264: throw new SAXException(err);
265: }
266: }
267:
268: private int getNameCode(String uri, String localname, String rawname)
269: throws SAXException {
270: // System.err.println("URI=" + uri + " local=" + " raw=" + rawname);
271: // The XML parser isn't required to report the rawname (qname), though all known parsers do.
272: // If none is provided, we give up
273: if (rawname.equals("")) {
274: throw new SAXException(
275: "Saxon requires an XML parser that reports the QName of each element");
276: }
277: // It's also possible (especially when using a TransformerHandler) that the parser
278: // has been configured to report the QName rather than the localname+URI
279: if (localname.equals("")) {
280: throw new SAXException(
281: "Parser configuration problem: namespace reporting is not enabled");
282: }
283:
284: // Following code maintains a local cache to remember all the namecodes that have been
285: // allocated, which reduces contention on the NamePool. It also avoid parsing the lexical QName
286: // when the same name is used repeatedly. We also get a tiny improvement by avoiding the first hash
287: // table lookup for names in the null namespace.
288:
289: HashMap map2 = (uri.equals("") ? noNamespaceMap
290: : (HashMap) cache.get(uri));
291: if (map2 == null) {
292: map2 = new HashMap(50);
293: cache.put(uri, map2);
294: if (uri.equals("")) {
295: noNamespaceMap = map2;
296: }
297: }
298:
299: Integer n = (Integer) map2.get(rawname);
300: // we use the rawname (qname) rather than the local name because we want a namecode rather than
301: // a fingerprint - that is, the prefix matters.
302: if (n == null) {
303: String prefix = NameChecker.getPrefix(rawname);
304: int nc = pool.allocate(prefix, uri, localname);
305: n = new Integer(nc);
306: map2.put(rawname, n);
307: return nc;
308: } else {
309: return n.intValue();
310: }
311:
312: }
313:
314: /**
315: * Callback interface for SAX: not for application use
316: */
317:
318: public void endElement(String uri, String localname, String rawname)
319: throws SAXException {
320: // System.err.println("ReceivingContentHandler#End element " + rawname);
321: try {
322: flush();
323: receiver.endElement();
324: } catch (ValidationException err) {
325: err.setLocator(locator);
326: if (!err.hasBeenReported()) {
327: try {
328: pipe.getErrorListener().fatalError(err);
329: } catch (TransformerException e) {
330: //
331: }
332: }
333: err.setHasBeenReported();
334: throw new SAXException(err);
335: } catch (XPathException err) {
336: throw new SAXException(err);
337: }
338: }
339:
340: /**
341: * Callback interface for SAX: not for application use
342: */
343:
344: public void characters(char ch[], int start, int length)
345: throws SAXException {
346: // System.err.println("characters (" + length + ")");
347: // need to concatenate chunks of text before we can decide whether a node is all-white
348:
349: while (used + length > buffer.length) {
350: char[] newbuffer = new char[buffer.length * 2];
351: System.arraycopy(buffer, 0, newbuffer, 0, used);
352: buffer = newbuffer;
353: slice = new CharSlice(buffer, 0, 0);
354: }
355: System.arraycopy(ch, start, buffer, used, length);
356: used += length;
357: }
358:
359: /**
360: * Callback interface for SAX: not for application use
361: */
362:
363: public void ignorableWhitespace(char ch[], int start, int length)
364: throws SAXException {
365: if (!ignoreIgnorable) {
366: characters(ch, start, length);
367: }
368: }
369:
370: /**
371: * Callback interface for SAX: not for application use<BR>
372: */
373:
374: public void processingInstruction(String name, String remainder)
375: throws SAXException {
376: try {
377: flush();
378: if (!inDTD) {
379: if (name == null) {
380: // trick used by some SAX1 parsers to notify a comment
381: comment(remainder.toCharArray(), 0, remainder
382: .length());
383: } else {
384: // some parsers allow through PI names containing colons
385: if (!getConfiguration().getNameChecker()
386: .isValidNCName(name)) {
387: throw new SAXException(
388: "Invalid processing instruction name ("
389: + name + ')');
390: }
391: receiver.processingInstruction(name, Whitespace
392: .removeLeadingWhitespace(remainder), 0, 0);
393: }
394: }
395: } catch (XPathException err) {
396: throw new SAXException(err);
397: }
398: }
399:
400: /**
401: * Callback interface for SAX (part of LexicalHandler interface): not for application use
402: */
403:
404: public void comment(char ch[], int start, int length)
405: throws SAXException {
406: try {
407: flush();
408: if (!inDTD) {
409: receiver
410: .comment(new CharSlice(ch, start, length), 0, 0);
411: }
412: } catch (XPathException err) {
413: throw new SAXException(err);
414: }
415: }
416:
417: /**
418: * Flush buffer for accumulated character data, suppressing white space if appropriate
419: */
420:
421: private void flush() throws XPathException {
422: if (used > 0) {
423: slice.setLength(used);
424: receiver.characters(slice, 0, 0);
425: used = 0;
426: }
427: }
428:
429: public void skippedEntity(String name) throws SAXException {
430: }
431:
432: // No-op methods to satisfy lexical handler interface
433:
434: /**
435: * Register the start of the DTD. Comments in the DTD are skipped because they
436: * are not part of the XPath data model
437: */
438:
439: public void startDTD(String name, String publicId, String systemId)
440: throws SAXException {
441: inDTD = true;
442: }
443:
444: /**
445: * Register the end of the DTD. Comments in the DTD are skipped because they
446: * are not part of the XPath data model
447: */
448:
449: public void endDTD() throws SAXException {
450: inDTD = false;
451: }
452:
453: public void startEntity(String name) throws SAXException {
454: };
455:
456: public void endEntity(String name) throws SAXException {
457: };
458:
459: public void startCDATA() throws SAXException {
460: };
461:
462: public void endCDATA() throws SAXException {
463: };
464:
465: //////////////////////////////////////////////////////////////////////////////
466: // Implement DTDHandler interface
467: //////////////////////////////////////////////////////////////////////////////
468:
469: public void notationDecl(String name, String publicId,
470: String systemId) throws SAXException {
471: }
472:
473: public void unparsedEntityDecl(String name, String publicId,
474: String systemId, String notationName) throws SAXException {
475: //System.err.println("Unparsed entity " + name + "=" + systemId);
476:
477: // Some SAX parsers report the systemId as written. We need to turn it into
478: // an absolute URL.
479:
480: String uri = systemId;
481: if (locator != null) {
482: try {
483: String baseURI = locator.getSystemId();
484: URI absoluteURI = new URI(baseURI).resolve(systemId);
485: uri = absoluteURI.toString();
486: } catch (URISyntaxException err) {
487: }
488: }
489: try {
490: receiver.setUnparsedEntity(name, uri, publicId);
491: } catch (XPathException err) {
492: throw new SAXException(err);
493: }
494: }
495:
496: // implement the SaxonLocator interface. This is needed to bridge a SAX Locator to a JAXP SourceLocator
497:
498: /**
499: * Return the public identifier for the current document event.
500: * @return A string containing the system identifier, or
501: * null if none is available.
502: */
503:
504: public String getSystemId() {
505: if (locator == null) {
506: return null;
507: } else {
508: return locator.getSystemId();
509: }
510: }
511:
512: /**
513: * Return the public identifier for the current document event.
514: * @return A string containing the public identifier, or
515: * null if none is available.
516: */
517:
518: public String getPublicId() {
519: if (locator == null) {
520: return null;
521: } else {
522: return locator.getPublicId();
523: }
524: }
525:
526: /**
527: * Return the line number where the current document event ends.
528: * @return The line number, or -1 if none is available.
529: */
530:
531: public int getLineNumber() {
532: if (locator == null) {
533: return -1;
534: } else {
535: return locator.getLineNumber();
536: }
537: }
538:
539: /**
540: * Return the character position where the current document event ends.
541: * @return The column number, or -1 if none is available.
542: */
543:
544: public int getColumnNumber() {
545: if (locator == null) {
546: return -1;
547: } else {
548: return locator.getColumnNumber();
549: }
550: }
551:
552: public String getSystemId(int locationId) {
553: return getSystemId();
554: }
555:
556: public int getLineNumber(int locationId) {
557: return getLineNumber();
558: }
559:
560: } // end of class ReceivingContentHandler
561:
562: //
563: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
564: // you may not use this file except in compliance with the License. You may obtain a copy of the
565: // License at http://www.mozilla.org/MPL/
566: //
567: // Software distributed under the License is distributed on an "AS IS" basis,
568: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
569: // See the License for the specific language governing rights and limitations under the License.
570: //
571: // The Original Code is: all this file.
572: //
573: // The Initial Developer of the Original Code is Michael H. Kay.
574: //
575: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
576: //
577: // Contributor(s): none.
578: //
|