001: /*
002: * Copyright 1999-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: /*
017: * $Id: SAX2RTFDTM.java,v 1.10 2005/01/24 00:34:36 mcnamara Exp $
018: */
019: package org.apache.xml.dtm.ref.sax2dtm;
020:
021: import javax.xml.transform.Source;
022:
023: import org.apache.xml.dtm.DTM;
024: import org.apache.xml.dtm.DTMManager;
025: import org.apache.xml.dtm.DTMWSFilter;
026: import org.apache.xml.utils.IntStack;
027: import org.apache.xml.utils.IntVector;
028: import org.apache.xml.utils.StringVector;
029: import org.apache.xml.utils.XMLStringFactory;
030:
031: import org.xml.sax.SAXException;
032:
033: /**
034: * This is a subclass of SAX2DTM which has been modified to meet the needs of
035: * Result Tree Frameworks (RTFs). The differences are:
036: *
037: * 1) Multiple XML trees may be appended to the single DTM. This means
038: * that the root node of each document is _not_ node 0. Some code has
039: * had to be deoptimized to support this mode of operation, and an
040: * explicit mechanism for obtaining the Node Handle of the root node
041: * has been provided.
042: *
043: * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
044: * most recently added trees off the end of the DTM as stylesheet elements
045: * (and thus variable contexts) are exited.
046: *
047: * PLEASE NOTE that this class may be _heavily_ dependent upon the
048: * internals of the SAX2DTM superclass, and must be maintained in
049: * parallel with that code. Arguably, they should be conditionals
050: * within a single class... but they have deen separated for
051: * performance reasons. (In fact, one could even argue about which is
052: * the superclass and which is the subclass; the current arrangement
053: * is as much about preserving stability of existing code during
054: * development as anything else.)
055: *
056: * %REVIEW% In fact, since the differences are so minor, I think it
057: * may be possible/practical to fold them back into the base
058: * SAX2DTM. Consider that as a future code-size optimization.
059: * */
060: public class SAX2RTFDTM extends SAX2DTM {
061: /** Set true to monitor SAX events and similar diagnostic info. */
062: private static final boolean DEBUG = false;
063:
064: /** Most recently started Document, or null if the DTM is empty. */
065: private int m_currentDocumentNode = NULL;
066:
067: /** Tail-pruning mark: Number of nodes in use */
068: IntStack mark_size = new IntStack();
069: /** Tail-pruning mark: Number of data items in use */
070: IntStack mark_data_size = new IntStack();
071: /** Tail-pruning mark: Number of size-of-data fields in use */
072: IntStack mark_char_size = new IntStack();
073: /** Tail-pruning mark: Number of dataOrQName slots in use */
074: IntStack mark_doq_size = new IntStack();
075: /** Tail-pruning mark: Number of namespace declaration sets in use
076: * %REVIEW% I don't think number of NS sets is ever different from number
077: * of NS elements. We can probabably reduce these to a single stack and save
078: * some storage.
079: * */
080: IntStack mark_nsdeclset_size = new IntStack();
081: /** Tail-pruning mark: Number of naespace declaration elements in use
082: * %REVIEW% I don't think number of NS sets is ever different from number
083: * of NS elements. We can probabably reduce these to a single stack and save
084: * some storage.
085: */
086: IntStack mark_nsdeclelem_size = new IntStack();
087:
088: /**
089: * Tail-pruning mark: initial number of nodes in use
090: */
091: int m_emptyNodeCount;
092:
093: /**
094: * Tail-pruning mark: initial number of namespace declaration sets
095: */
096: int m_emptyNSDeclSetCount;
097:
098: /**
099: * Tail-pruning mark: initial number of namespace declaration elements
100: */
101: int m_emptyNSDeclSetElemsCount;
102:
103: /**
104: * Tail-pruning mark: initial number of data items in use
105: */
106: int m_emptyDataCount;
107:
108: /**
109: * Tail-pruning mark: initial number of characters in use
110: */
111: int m_emptyCharsCount;
112:
113: /**
114: * Tail-pruning mark: default initial number of dataOrQName slots in use
115: */
116: int m_emptyDataQNCount;
117:
118: public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
119: DTMWSFilter whiteSpaceFilter,
120: XMLStringFactory xstringfactory, boolean doIndexing) {
121: super (mgr, source, dtmIdentity, whiteSpaceFilter,
122: xstringfactory, doIndexing);
123:
124: // NEVER track source locators for RTFs; they aren't meaningful. I think.
125: // (If we did track them, we'd need to tail-prune these too.)
126: //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location;
127: m_useSourceLocationProperty = false;
128: m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
129: : null;
130: m_sourceLine = (m_useSourceLocationProperty) ? new IntVector()
131: : null;
132: m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector()
133: : null;
134:
135: // Record initial sizes of fields that are pushed and restored
136: // for RTF tail-pruning. More entries can be popped than pushed, so
137: // we need this to mark the primordial state of the DTM.
138: m_emptyNodeCount = m_size;
139: m_emptyNSDeclSetCount = (m_namespaceDeclSets == null) ? 0
140: : m_namespaceDeclSets.size();
141: m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null) ? 0
142: : m_namespaceDeclSetElements.size();
143: m_emptyDataCount = m_data.size();
144: m_emptyCharsCount = m_chars.size();
145: m_emptyDataQNCount = m_dataOrQName.size();
146: }
147:
148: /**
149: * Given a DTM, find the owning document node. In the case of
150: * SAX2RTFDTM, which may contain multiple documents, this returns
151: * the <b>most recently started</b> document, or null if the DTM is
152: * empty or no document is currently under construction.
153: *
154: * %REVIEW% Should we continue to report the most recent after
155: * construction has ended? I think not, given that it may have been
156: * tail-pruned.
157: *
158: * @return int Node handle of Document node, or null if this DTM does not
159: * contain an "active" document.
160: * */
161: public int getDocument() {
162: return makeNodeHandle(m_currentDocumentNode);
163: }
164:
165: /**
166: * Given a node handle, find the owning document node, using DTM semantics
167: * (Document owns itself) rather than DOM semantics (Document has no owner).
168: *
169: * (I'm counting on the fact that getOwnerDocument() is implemented on top
170: * of this call, in the superclass, to avoid having to rewrite that one.
171: * Be careful if that code changes!)
172: *
173: * @param nodeHandle the id of the node.
174: * @return int Node handle of owning document
175: */
176: public int getDocumentRoot(int nodeHandle) {
177: for (int id = makeNodeIdentity(nodeHandle); id != NULL; id = _parent(id)) {
178: if (_type(id) == DTM.DOCUMENT_NODE) {
179: return makeNodeHandle(id);
180: }
181: }
182:
183: return DTM.NULL; // Safety net; should never happen
184: }
185:
186: /**
187: * Given a node identifier, find the owning document node. Unlike the DOM,
188: * this considers the owningDocument of a Document to be itself. Note that
189: * in shared DTMs this may not be zero.
190: *
191: * @param nodeIdentifier the id of the starting node.
192: * @return int Node identifier of the root of this DTM tree
193: */
194: protected int _documentRoot(int nodeIdentifier) {
195: if (nodeIdentifier == NULL)
196: return NULL;
197:
198: for (int parent = _parent(nodeIdentifier); parent != NULL; nodeIdentifier = parent, parent = _parent(nodeIdentifier))
199: ;
200:
201: return nodeIdentifier;
202: }
203:
204: /**
205: * Receive notification of the beginning of a new RTF document.
206: *
207: * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
208: * might want to consider folding the start/endDocument changes back
209: * into the main SAX2DTM so we don't have to expose so many fields
210: * (even as Protected) and carry the additional code.
211: *
212: * @throws SAXException Any SAX exception, possibly
213: * wrapping another exception.
214: * @see org.xml.sax.ContentHandler#startDocument
215: * */
216: public void startDocument() throws SAXException {
217: // Re-initialize the tree append process
218: m_endDocumentOccured = false;
219: m_prefixMappings = new java.util.Vector();
220: m_contextIndexes = new IntStack();
221: m_parents = new IntStack();
222:
223: m_currentDocumentNode = m_size;
224: super .startDocument();
225: }
226:
227: /**
228: * Receive notification of the end of the document.
229: *
230: * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
231: * might want to consider folding the start/endDocument changes back
232: * into the main SAX2DTM so we don't have to expose so many fields
233: * (even as Protected).
234: *
235: * @throws SAXException Any SAX exception, possibly
236: * wrapping another exception.
237: * @see org.xml.sax.ContentHandler#endDocument
238: * */
239: public void endDocument() throws SAXException {
240: charactersFlush();
241:
242: m_nextsib.setElementAt(NULL, m_currentDocumentNode);
243:
244: if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
245: m_firstch.setElementAt(NULL, m_currentDocumentNode);
246:
247: if (DTM.NULL != m_previous)
248: m_nextsib.setElementAt(DTM.NULL, m_previous);
249:
250: m_parents = null;
251: m_prefixMappings = null;
252: m_contextIndexes = null;
253:
254: m_currentDocumentNode = NULL; // no longer open
255: m_endDocumentOccured = true;
256: }
257:
258: /** "Tail-pruning" support for RTFs.
259: *
260: * This function pushes information about the current size of the
261: * DTM's data structures onto a stack, for use by popRewindMark()
262: * (which see).
263: *
264: * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
265: * RTFs will not be indexed, so I can simply panic if that case
266: * arises. Hey, it works...
267: * */
268: public void pushRewindMark() {
269: if (m_indexing || m_elemIndexes != null)
270: throw new java.lang.NullPointerException(
271: "Coding error; Don't try to mark/rewind an indexed DTM");
272:
273: // Values from DTMDefaultBase
274: // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
275: mark_size.push(m_size);
276: mark_nsdeclset_size.push((m_namespaceDeclSets == null) ? 0
277: : m_namespaceDeclSets.size());
278: mark_nsdeclelem_size
279: .push((m_namespaceDeclSetElements == null) ? 0
280: : m_namespaceDeclSetElements.size());
281:
282: // Values from SAX2DTM
283: mark_data_size.push(m_data.size());
284: mark_char_size.push(m_chars.size());
285: mark_doq_size.push(m_dataOrQName.size());
286: }
287:
288: /** "Tail-pruning" support for RTFs.
289: *
290: * This function pops the information previously saved by
291: * pushRewindMark (which see) and uses it to discard all nodes added
292: * to the DTM after that time. We expect that this will allow us to
293: * reuse storage more effectively.
294: *
295: * This is _not_ intended to be called while a document is still being
296: * constructed -- only between endDocument and the next startDocument
297: *
298: * %REVIEW% WARNING: This is the first use of some of the truncation
299: * methods. If Xalan blows up after this is called, that's a likely
300: * place to check.
301: *
302: * %REVIEW% Our original design for DTMs permitted them to share
303: * string pools. If there any risk that this might be happening, we
304: * can _not_ rewind and recover the string storage. One solution
305: * might to assert that DTMs used for RTFs Must Not take advantage
306: * of that feature, but this seems excessively fragile. Another, much
307: * less attractive, would be to just let them leak... Nah.
308: *
309: * @return true if and only if the pop completely emptied the
310: * RTF. That response is used when determining how to unspool
311: * RTF-started-while-RTF-open situations.
312: * */
313: public boolean popRewindMark() {
314: boolean top = mark_size.empty();
315:
316: m_size = top ? m_emptyNodeCount : mark_size.pop();
317: m_exptype.setSize(m_size);
318: m_firstch.setSize(m_size);
319: m_nextsib.setSize(m_size);
320: m_prevsib.setSize(m_size);
321: m_parent.setSize(m_size);
322:
323: m_elemIndexes = null;
324:
325: int ds = top ? m_emptyNSDeclSetCount : mark_nsdeclset_size
326: .pop();
327: if (m_namespaceDeclSets != null) {
328: m_namespaceDeclSets.setSize(ds);
329: }
330:
331: int ds1 = top ? m_emptyNSDeclSetElemsCount
332: : mark_nsdeclelem_size.pop();
333: if (m_namespaceDeclSetElements != null) {
334: m_namespaceDeclSetElements.setSize(ds1);
335: }
336:
337: // Values from SAX2DTM - m_data always has a reserved entry
338: m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
339: m_chars.setLength(top ? m_emptyCharsCount : mark_char_size
340: .pop());
341: m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size
342: .pop());
343:
344: // Return true iff DTM now empty
345: return m_size == 0;
346: }
347:
348: /** @return true if a DTM tree is currently under construction.
349: * */
350: public boolean isTreeIncomplete() {
351: return !m_endDocumentOccured;
352: }
353: }
|