001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * XMLOptions.java
019: * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
020: *
021: */
022:
023: package weka.core.xml;
024:
025: import java.io.File;
026: import java.io.InputStream;
027: import java.io.Reader;
028: import java.util.Vector;
029: import org.w3c.dom.Document;
030: import org.w3c.dom.Element;
031: import org.w3c.dom.Node;
032: import org.w3c.dom.NodeList;
033:
034: /**
035: * A class for transforming options listed in XML to a regular WEKA command
036: * line string.<p>
037: *
038: * @author FracPete (fracpete at waikato dot ac dot nz)
039: * @version $Revision: 1.2 $
040: */
041: public class XMLOptions {
042: /** tag for a single option */
043: public final static String TAG_OPTION = "option";
044:
045: /** tag for a list of options */
046: public final static String TAG_OPTIONS = "options";
047:
048: /** the name attribute */
049: public final static String ATT_NAME = "name";
050:
051: /** the type attribute */
052: public final static String ATT_TYPE = "type";
053:
054: /** the value attribute */
055: public final static String ATT_VALUE = "value";
056:
057: /** a value of the type attribute */
058: public final static String VAL_TYPE_FLAG = "flag";
059:
060: /** a value of the type attribute */
061: public final static String VAL_TYPE_SINGLE = "single";
062:
063: /** a value of the type attribute */
064: public final static String VAL_TYPE_HYPHENS = "hyphens";
065:
066: /** a value of the type attribute */
067: public final static String VAL_TYPE_QUOTES = "quotes";
068:
069: /** a value of the type attribute */
070: public final static String VAL_TYPE_CLASSIFIER = "classifier";
071:
072: /** the root node */
073: public final static String ROOT_NODE = TAG_OPTIONS;
074:
075: /** the DTD for the XML file */
076: public final static String DOCTYPE = "<!DOCTYPE " + ROOT_NODE
077: + "\n" + "[\n" + " <!ELEMENT " + TAG_OPTIONS + " ("
078: + TAG_OPTION + ")*>\n" + " <!ATTLIST " + TAG_OPTIONS
079: + " " + ATT_TYPE + " CDATA \"classifier\">\n"
080: + " <!ATTLIST " + TAG_OPTIONS + " " + ATT_VALUE
081: + " CDATA \"\">\n" + " <!ELEMENT " + TAG_OPTION
082: + " (#PCDATA | " + TAG_OPTIONS + ")*>\n" + " <!ATTLIST "
083: + TAG_OPTION + " " + ATT_NAME + " CDATA #REQUIRED>\n"
084: + " <!ATTLIST " + TAG_OPTION + " " + ATT_TYPE
085: + " (flag | single | hyphens | quotes) \"single\">\n"
086: + "]\n" + ">";
087:
088: /** the XML document */
089: protected XMLDocument m_XMLDocument = null;
090:
091: /**
092: * Creates a new instance of XMLOptions
093: * @throws Exception if the construction of the DocumentBuilder fails
094: * @see #setValidating(boolean)
095: */
096: public XMLOptions() throws Exception {
097: m_XMLDocument = new XMLDocument();
098: m_XMLDocument.setRootNode(ROOT_NODE);
099: m_XMLDocument.setDocType(DOCTYPE);
100: setValidating(true);
101: }
102:
103: /**
104: * Creates a new instance of XMLOptions
105: * @param xml the xml to parse (if "<?xml" is not found then it is considered a file)
106: * @throws Exception if the construction of the DocumentBuilder fails
107: * @see #setValidating(boolean)
108: */
109: public XMLOptions(String xml) throws Exception {
110: this ();
111: getXMLDocument().read(xml);
112: }
113:
114: /**
115: * Creates a new instance of XMLOptions
116: * @param file the XML file to parse
117: * @throws Exception if the construction of the DocumentBuilder fails
118: * @see #setValidating(boolean)
119: */
120: public XMLOptions(File file) throws Exception {
121: this ();
122: getXMLDocument().read(file);
123: }
124:
125: /**
126: * Creates a new instance of XMLOptions
127: * @param stream the XML stream to parse
128: * @throws Exception if the construction of the DocumentBuilder fails
129: * @see #setValidating(boolean)
130: */
131: public XMLOptions(InputStream stream) throws Exception {
132: this ();
133: getXMLDocument().read(stream);
134: }
135:
136: /**
137: * Creates a new instance of XMLOptions
138: * @param reader the XML reader to parse
139: * @throws Exception if the construction of the DocumentBuilder fails
140: * @see #setValidating(boolean)
141: */
142: public XMLOptions(Reader reader) throws Exception {
143: this ();
144: getXMLDocument().read(reader);
145: }
146:
147: /**
148: * returns whether a validating parser is used
149: * @return whether a validating parser is used
150: */
151: public boolean getValidating() {
152: return m_XMLDocument.getValidating();
153: }
154:
155: /**
156: * sets whether to use a validating parser or not. <br>
157: * Note: this does clear the current DOM document!
158: * @param validating whether to use a validating parser
159: * @throws Exception if the instantiating of the DocumentBuilder fails
160: */
161: public void setValidating(boolean validating) throws Exception {
162: m_XMLDocument.setValidating(validating);
163: }
164:
165: /**
166: * returns the parsed DOM document
167: * @return the parsed DOM document
168: */
169: public Document getDocument() {
170: return fixHyphens(m_XMLDocument.getDocument());
171: }
172:
173: /**
174: * returns the handler of the XML document. the internal DOM document can
175: * be accessed via the <code>getDocument()</code> method.
176: * @return the object handling the XML document
177: * @see #getDocument()
178: */
179: public XMLDocument getXMLDocument() {
180: return m_XMLDocument;
181: }
182:
183: /**
184: * pushes any options with type ATT_HYPHENS to the end, s.t. the "--" are
185: * really added at the end
186: * @param document the DOM document to work on
187: * @return the fixed DOM document
188: */
189: protected Document fixHyphens(Document document) {
190: NodeList list;
191: Vector hyphens;
192: int i;
193: Node node;
194: Node tmpNode;
195: boolean isLast;
196:
197: // get all option tags
198: list = document.getDocumentElement().getElementsByTagName(
199: TAG_OPTION);
200:
201: // get all hyphen tags
202: hyphens = new Vector();
203: for (i = 0; i < list.getLength(); i++) {
204: if (((Element) list.item(i)).getAttribute(ATT_TYPE).equals(
205: VAL_TYPE_HYPHENS))
206: hyphens.add(list.item(i));
207: }
208:
209: // check all hyphen tags whether they are the end, if not fix it
210: for (i = 0; i < hyphens.size(); i++) {
211: node = (Node) hyphens.get(i);
212:
213: // at the end?
214: isLast = true;
215: tmpNode = node;
216: while (tmpNode.getNextSibling() != null) {
217: // normal tag?
218: if (tmpNode.getNextSibling().getNodeType() == Node.ELEMENT_NODE) {
219: isLast = false;
220: break;
221: }
222: tmpNode = tmpNode.getNextSibling();
223: }
224:
225: // move
226: if (!isLast) {
227: tmpNode = node.getParentNode();
228: tmpNode.removeChild(node);
229: tmpNode.appendChild(node);
230: }
231: }
232:
233: return document;
234: }
235:
236: /**
237: * returns the quotes level for the given node, i.e. it returns the number
238: * of option's of the type "quotes" are in the path
239: */
240: protected int getQuotesLevel(Node node) {
241: int result;
242:
243: result = 0;
244: while (node.getParentNode() != null) {
245: if (!(node instanceof Element))
246: continue;
247:
248: // option-tag?
249: if (node.getNodeName().equals(TAG_OPTION)) {
250: // types = quotes?
251: if (((Element) node).getAttribute(ATT_TYPE).equals(
252: VAL_TYPE_QUOTES))
253: result++;
254: }
255:
256: node = node.getParentNode();
257: }
258:
259: return result;
260: }
261:
262: /**
263: * converts the given node into a command line representation and adds it
264: * to the existing command line
265: * @param cl the command line so far
266: * @param parent the node to convert to command line
267: * @param depth the current depth
268: * @return the new command line
269: */
270: protected String toCommandLine(String cl, Element parent, int depth) {
271: String newCl;
272: String tmpCl;
273: int i;
274: Vector list;
275: Vector subList;
276: NodeList subNodeList;
277: Element node;
278:
279: newCl = "";
280:
281: // options
282: if (parent.getNodeName().equals(TAG_OPTIONS)) {
283: // classifier? -> add
284: if (parent.getAttribute(ATT_TYPE).equals(
285: VAL_TYPE_CLASSIFIER)) {
286: newCl += parent.getAttribute(ATT_VALUE);
287: }
288:
289: // process children
290: list = XMLDocument.getChildTags(parent);
291: for (i = 0; i < list.size(); i++)
292: newCl = toCommandLine(newCl, (Element) list.get(i),
293: depth + 1);
294: } else
295: // option
296: if (parent.getNodeName().equals(TAG_OPTION)) {
297: newCl += " -" + parent.getAttribute(ATT_NAME);
298: subList = XMLDocument.getChildTags(parent);
299: subNodeList = parent.getChildNodes();
300:
301: if (parent.getAttribute(ATT_TYPE).equals(VAL_TYPE_SINGLE)) {
302: if ((subNodeList.getLength() > 0)
303: && (!subNodeList.item(0).getNodeValue().trim()
304: .equals("")))
305: newCl += " "
306: + subNodeList.item(0).getNodeValue().trim();
307: } else if (parent.getAttribute(ATT_TYPE).equals(
308: VAL_TYPE_HYPHENS)) {
309: newCl += " "
310: + ((Element) subList.get(0))
311: .getAttribute(ATT_VALUE); // expects classifier
312: // get single options in this node
313: subList = XMLDocument.getChildTags((Element) subList
314: .get(0));
315: // get options after --
316: tmpCl = "";
317: for (i = 0; i < subList.size(); i++)
318: tmpCl = toCommandLine(tmpCl, (Element) subList
319: .get(i), depth + 1);
320: // add options
321: tmpCl = tmpCl.trim();
322: if (!tmpCl.equals(""))
323: newCl += " -- " + tmpCl;
324: } else if (parent.getAttribute(ATT_TYPE).equals(
325: VAL_TYPE_QUOTES)) {
326: newCl += " ";
327: // opening quote
328: for (i = 1; i < getQuotesLevel(parent); i++)
329: newCl += "\\";
330: newCl += "\"";
331: // options
332: tmpCl = "";
333: for (i = 0; i < subList.size(); i++)
334: tmpCl = toCommandLine(tmpCl, (Element) subList
335: .get(i), depth + 1);
336: newCl += tmpCl.trim();
337: // closing quote
338: for (i = 1; i < getQuotesLevel(parent); i++)
339: newCl += "\\";
340: newCl += "\"";
341: }
342: }
343:
344: // add to existing command line
345: cl += " " + newCl.trim();
346:
347: return cl.trim();
348: }
349:
350: /**
351: * returns the given DOM document as command line
352: * @return the document as command line
353: * @throws Exception if anything goes wrong initializing the parsing
354: */
355: public String toCommandLine() throws Exception {
356: return toCommandLine(new String(), getDocument()
357: .getDocumentElement(), 0);
358: }
359:
360: /**
361: * returns the current DOM document as string array (takes care of quotes!)
362: * @return the document as string array
363: * @throws Exception if anything goes wrong initializing the parsing
364: */
365: public String[] toArray() throws Exception {
366: String cl;
367: Vector result;
368: boolean quotes;
369: boolean backslash;
370: boolean add;
371: int i;
372: String tmpStr;
373:
374: cl = toCommandLine();
375: result = new Vector();
376:
377: // break up string
378: quotes = false;
379: backslash = false;
380: tmpStr = "";
381: for (i = 0; i < cl.length(); i++) {
382: add = true;
383:
384: switch (cl.charAt(i)) {
385: case '\\':
386: backslash = true;
387: break;
388:
389: case '"':
390: // can we toggle quotes? (ignore nested quotes)
391: if (!backslash) {
392: quotes = !quotes;
393: add = false;
394: }
395: backslash = false;
396: break;
397:
398: case ' ':
399: // if not quoted then break!
400: if (!quotes) {
401: result.add(tmpStr.replaceAll("\\\\\"", "\""));
402: add = false;
403: tmpStr = "";
404: }
405: break;
406: }
407:
408: if (add)
409: tmpStr += "" + cl.charAt(i);
410: }
411:
412: // add last part
413: if (!tmpStr.equals(""))
414: result.add(tmpStr);
415:
416: return (String[]) result.toArray(new String[1]);
417: }
418:
419: /**
420: * returns the object in a string representation (as indented XML output)
421: *
422: * @return the object in a string representation
423: */
424: public String toString() {
425: return getXMLDocument().toString();
426: }
427:
428: /**
429: * for testing only. prints the given XML, the resulting commandline and
430: * the string array.
431: */
432: public static void main(String[] args) throws Exception {
433: if (args.length > 0) {
434: System.out.println("\nXML:\n\n"
435: + new XMLOptions(args[0]).toString());
436:
437: System.out.println("\nCommandline:\n\n"
438: + new XMLOptions(args[0]).toCommandLine());
439:
440: System.out.println("\nString array:\n");
441: String[] options = new XMLOptions(args[0]).toArray();
442: for (int i = 0; i < options.length; i++)
443: System.out.println(options[i]);
444: }
445: }
446: }
|