001: /*
002: * <copyright>
003: *
004: * Copyright 2003-2004 BBNT Solutions, LLC
005: * under sponsorship of the Defense Advanced Research Projects
006: * Agency (DARPA).
007: *
008: * You can redistribute this software and/or modify it under the
009: * terms of the Cougaar Open Source License as published on the
010: * Cougaar Open Source Website (www.cougaar.org).
011: *
012: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
013: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
014: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
015: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
016: * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
017: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
018: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
019: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
020: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
021: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
022: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
023: *
024: * </copyright>
025: */
026:
027: package org.cougaar.lib.aggagent.util;
028:
029: import org.w3c.dom.Element;
030: import org.w3c.dom.NamedNodeMap;
031: import org.w3c.dom.Node;
032: import org.w3c.dom.NodeList;
033:
034: /**
035: * InverseSax is a class that acts as the reverse of a SAX parser. In other
036: * words, it creates the text of an XML document by accepting notification
037: * of parts of the XML structure through method calls. Those parts include
038: * opening and closing tags, attributes, and text. In the attributes and
039: * text, encoding of special characters is handled automatically.
040: */
041: public class InverseSax {
042: private static class NameNode {
043: public String tag = null;
044: public NameNode next = null;
045:
046: public NameNode(String t, NameNode n) {
047: tag = t;
048: next = n;
049: }
050: }
051:
052: private static byte EMPTY = 0;
053: private static byte IN_TAG = 1;
054: private static byte IN_ELEMENT = 2;
055: private static byte IN_TEXT = 3;
056: private static byte DONE = 4;
057:
058: private byte state = EMPTY;
059:
060: private StringBuffer buf = new StringBuffer();
061: private NameNode nameStack = null;
062: private boolean lenientMode = false;
063: private boolean prettyPrint = false;
064: private int indentTabs = 0;
065:
066: private void pushName(String name) {
067: nameStack = new NameNode(name, nameStack);
068: }
069:
070: private boolean nameStackEmpty() {
071: return nameStack == null;
072: }
073:
074: private String popName() {
075: NameNode p = nameStack;
076: nameStack = p.next;
077: return p.tag;
078: }
079:
080: private void encode(String s) {
081: char[] chars = s.toCharArray();
082: for (int i = 0; i < chars.length; i++) {
083: char c = chars[i];
084: if (c == '&')
085: buf.append("&");
086: else if (c == '<')
087: buf.append("<");
088: else if (c == '>')
089: buf.append(">");
090: else if (c == '\'')
091: buf.append("'");
092: else if (c == '"')
093: buf.append(""");
094: else
095: buf.append(c);
096: }
097: }
098:
099: /**
100: * Set the lenient mode on or off. In lenient mode, the tag and attribute
101: * names are not checked for invalid characters. This class accepts only
102: * the Latin alphabet (upper- and lower-case) as letters and {0, 1, ..., 9}
103: * as digits, and it does not allow the colon (used in XML namespaces).
104: * There are many other sets of letters, digits, and punctuation characters
105: * in the UNICODE spec that are allowed by standard XML. To use these
106: * characters or XML namespaces, lenient mode must be turned on.
107: * <br><br>
108: * Use at your own risk.
109: */
110: public void setLenientMode(boolean b) {
111: lenientMode = b;
112: }
113:
114: /**
115: * turn pretty-printing on or off
116: */
117: public void setPrettyPrintMode(boolean b) {
118: if (state != EMPTY)
119: throw new IllegalStateException(
120: "Pretty-print must be set before content is added.");
121: prettyPrint = b;
122: }
123:
124: // add indentation
125: private void indent() {
126: buf.append("\n");
127: for (int i = 0; i < indentTabs; i++)
128: buf.append(" ");
129: }
130:
131: // Allow upper- and lower-case letters and underscores.
132: // Currently, the colon is not allowed--we don't use namespaces.
133: private static boolean validateInitial(char c) {
134: return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
135: || c == '_'
136: /* || c == ':' */;
137: }
138:
139: // Any initial is allowed here, as well as digits, hyphens, and periods.
140: private static boolean validateNameChar(char c) {
141: return validateInitial(c) || ('0' <= c && c <= '9') || c == '-'
142: || c == '.';
143: }
144:
145: private boolean validateName(String s) {
146: if (s == null || s.length() == 0) {
147: return false;
148: } else if (!lenientMode) {
149: char[] chars = s.toCharArray();
150: if (!validateInitial(chars[0]))
151: return false;
152: else
153: for (int i = 1; i < chars.length; i++)
154: if (!validateNameChar(chars[i]))
155: return false;
156: }
157: return true;
158: }
159:
160: /**
161: * Return this XML document generator to its pristine state, abandoning any
162: * work previously in progress.
163: */
164: public void reset() {
165: buf = new StringBuffer();
166: nameStack = null;
167: state = EMPTY;
168: }
169:
170: /**
171: * Add a new element to the document. This can be the document root or a
172: * child of another element. After the root element has been closed, no
173: * more elements may be added, and attempting to do so will result in an
174: * IllegalStateException. This method also verifies that the tag name is
175: * valid (see above).
176: */
177: public void addElement(String tag) {
178: if (state == DONE)
179: throw new IllegalStateException(
180: "end of document--can't add elements");
181: if (!validateName(tag))
182: throw new IllegalArgumentException("illegal tag name: "
183: + tag);
184: if (state == IN_TAG)
185: buf.append(">");
186: if (prettyPrint) {
187: if (state == IN_TAG || state == IN_TEXT)
188: indentTabs++;
189: indent();
190: }
191: buf.append("<");
192: buf.append(tag);
193: pushName(tag);
194: state = IN_TAG;
195: }
196:
197: /**
198: * Convenience method for adding an element with text but no attributes or
199: * child elements.
200: */
201: public void addTextElement(String tag, String text) {
202: addElement(tag);
203: addText(text);
204: endElement();
205: }
206:
207: /**
208: * Convenience method for adding an element with a single attribute and
209: * no content.
210: */
211: public void addEltAtt(String tag, String att, String val) {
212: addEltAttText(tag, att, val, null);
213: }
214:
215: /**
216: * Convenience method for adding an element with a single attribute and
217: * text for content. Specify null for no content.
218: */
219: public void addEltAttText(String tag, String att, String val,
220: String text) {
221: addElement(tag);
222: addAttribute(att, val);
223: if (text != null)
224: addText(text);
225: endElement();
226: }
227:
228: /**
229: * Add an attribute to the current XML element. This method is only valid
230: * after creating an element and before adding other contents, such as text
231: * or child elements. Use of this method at any other time will raise an
232: * IllegalStateException. Special characters within the attribute value are
233: * automatically replaced with the appropriate character entities. This
234: * method also verifies that the tag name is valid (see above).
235: */
236: public void addAttribute(String name, String value) {
237: if (state != IN_TAG)
238: throw new IllegalStateException(
239: "attributes belong inside an XML tag");
240: if (!validateName(name))
241: throw new IllegalArgumentException(
242: "illegal attribute name: " + name);
243: buf.append(" ");
244: buf.append(name);
245: buf.append("=\"");
246: encode(value);
247: buf.append("\"");
248: }
249:
250: /**
251: * Add text content to the current XML element. This method is valid any
252: * time after the root element is opened but before it is closed. This
253: * method may be called multiple times within a single element, but the
254: * effect is the same as calling it once with the concatenation of the text
255: * of the many calls (in the same order).
256: */
257: public void addText(String text) {
258: if (state == EMPTY || state == DONE)
259: throw new IllegalStateException(
260: "text belongs inside an XML element");
261: if (state == IN_TAG) {
262: buf.append(">");
263: state = IN_TEXT;
264: }
265: encode(text);
266: }
267:
268: /**
269: * Close the current element. Every tag must be closed explicitly by a
270: * call to this method (or endDocument, which calls this method).
271: */
272: public void endElement() {
273: if (state == EMPTY || state == DONE)
274: throw new IllegalStateException(
275: "can't close element--none is current");
276: String tag = popName();
277: if (state == IN_TAG) {
278: buf.append("/>");
279: } else {
280: if (prettyPrint && state == IN_ELEMENT) {
281: indentTabs--;
282: indent();
283: }
284: buf.append("</");
285: buf.append(tag);
286: buf.append(">");
287: }
288: if (nameStackEmpty())
289: state = DONE;
290: else
291: state = IN_ELEMENT;
292: }
293:
294: /**
295: * This method probably shouldn't be used under normal conditions. However,
296: * in case an error or some other unexpected condition is encountered while
297: * creating the XML document, this method can be used to end the document
298: * gracefully. Following any call to this method, toString() is guaranteed
299: * to return either the text of a well-formed XML document or the empty
300: * String (and the latter only if no elements were added).
301: * <br><br>
302: * After this method is called, no more content may be added, even if the
303: * document is empty.
304: */
305: public void endDocument() {
306: while (!nameStackEmpty())
307: endElement();
308: state = DONE;
309: }
310:
311: /**
312: * Return the text of the XML document.
313: */
314: public String toString() {
315: return buf.toString();
316: }
317:
318: // - - - - - - - Testing Harness - - - - - - - - - - - - - - - - - - - - - - -
319:
320: public static void main(String[] argv) {
321: InverseSax doc = new InverseSax();
322: doc.setPrettyPrintMode(true);
323: doc.addElement("bla.bla");
324: doc.addAttribute("type", "bl<a>h");
325: doc.addAttribute("bla.id", "sc&um");
326: doc.addText("SomeText");
327: for (int i = 0; i < 5; i++) {
328: doc.addElement("yargh");
329: doc.addAttribute("value", "high");
330: doc.addText("<" + i + ">");
331: doc.endElement();
332: }
333: doc.endElement();
334:
335: System.out.println(doc.toString());
336: System.out.println();
337:
338: try {
339: Element elt = XmlUtils.parse(doc.toString());
340: recursivePrint(elt);
341: } catch (Exception bugger_off) {
342: }
343: }
344:
345: private static void recursivePrint(Element elt) {
346: System.out.print("{node(" + elt.getNodeName() + ")[");
347: NamedNodeMap nnm = elt.getAttributes();
348: for (int i = 0; i < nnm.getLength(); i++) {
349: Node att = nnm.item(i);
350: System.out.print(att.getNodeName() + "="
351: + att.getNodeValue() + ";");
352: }
353: System.out.print("]");
354: NodeList nl = elt.getChildNodes();
355: for (int i = 0; i < nl.getLength(); i++) {
356: Node child = nl.item(i);
357: if (child.getNodeType() == Node.ELEMENT_NODE)
358: recursivePrint((Element) child);
359: else if (child.getNodeType() == Node.TEXT_NODE)
360: System.out.print("\"" + child.getNodeValue() + "\"");
361: }
362: System.out.print("}");
363: }
364: }
|