001: /*
002: * Machine Learning support for FindBugs
003: * Copyright (C) 2004,2005 University of Maryland
004: *
005: * This library is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU Lesser General Public
007: * License as published by the Free Software Foundation; either
008: * version 2.1 of the License, or (at your option) any later version.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General Public
016: * License along with this library; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: */
019:
020: package edu.umd.cs.findbugs.ml;
021:
022: import java.io.BufferedOutputStream;
023: import java.io.FileOutputStream;
024: import java.io.IOException;
025: import java.io.OutputStreamWriter;
026: import java.io.PrintStream;
027: import java.io.Writer;
028: import java.util.ArrayList;
029: import java.util.Collection;
030: import java.util.IdentityHashMap;
031: import java.util.Iterator;
032: import java.util.LinkedList;
033: import java.util.List;
034: import java.util.Random;
035: import java.util.Set;
036: import java.util.StringTokenizer;
037: import java.util.TreeSet;
038:
039: import org.dom4j.Document;
040: import org.dom4j.Element;
041: import org.dom4j.Node;
042: import org.dom4j.io.SAXReader;
043:
044: import edu.umd.cs.findbugs.BugCollection;
045: import edu.umd.cs.findbugs.BugInstance;
046: import edu.umd.cs.findbugs.config.CommandLine;
047:
048: /**
049: * Convert a BugCollection into ARFF format.
050: * See Witten and Frank, <em>Data Mining</em>, ISBN 1-55860-552-5.
051: *
052: * @see BugCollection
053: * @see BugInstance
054: * @author David Hovemeyer
055: */
056: public class ConvertToARFF {
057: // ------------------------------------------------------------
058: // Helper classes
059: // ------------------------------------------------------------
060:
061: private static class DataFile {
062: private Document document;
063: private String appName;
064:
065: public DataFile(Document document, String appName) {
066: this .document = document;
067: this .appName = appName;
068: }
069:
070: public Document getDocument() {
071: return document;
072: }
073:
074: public String getAppName() {
075: return appName;
076: }
077: }
078:
079: private static class MissingNodeException extends Exception {
080: private static final long serialVersionUID = -5042140832791541208L;
081:
082: public MissingNodeException(String msg) {
083: super (msg);
084: }
085: }
086:
087: public interface Attribute {
088: public String getName();
089:
090: public void scan(Element element, String appName)
091: throws MissingNodeException;
092:
093: public String getRange();
094:
095: public String getInstanceValue(Element element, String appName)
096: throws MissingNodeException;
097: }
098:
099: private abstract static class XPathAttribute implements Attribute {
100: private String name;
101: private String xpath;
102:
103: public XPathAttribute(String name, String xpath) {
104: this .name = name;
105: this .xpath = xpath;
106: }
107:
108: public String getName() {
109: return name;
110: }
111:
112: public String getInstanceValue(Element element, String appName)
113: throws MissingNodeException {
114: Object value = element.selectObject(xpath);
115: if (value == null)
116: throw new MissingNodeException(
117: "Could not get value from element (path="
118: + xpath + ")");
119: if (value instanceof List) {
120: List<?> list = (List<?>) value;
121: if (list.size() == 0)
122: throw new MissingNodeException(
123: "Could not get value from element (path="
124: + xpath + ")");
125: value = list.get(0);
126: }
127:
128: if (value instanceof Node) {
129: Node node = (Node) value;
130: return node.getText();
131: } else if (value instanceof String) {
132: return (String) value;
133: } else if (value instanceof Number) {
134: String s = value.toString();
135: if (s.endsWith(".0"))
136: s = s.substring(0, s.length() - 2);
137: return s;
138: } else
139: throw new IllegalStateException(
140: "Unexpected object returned from xpath query: "
141: + value);
142: }
143: }
144:
145: public static class NominalAttribute extends XPathAttribute {
146: private Set<String> possibleValueSet;
147:
148: public NominalAttribute(String name, String xpath) {
149: super (name, xpath);
150: this .possibleValueSet = new TreeSet<String>();
151: }
152:
153: public void scan(Element element, String appName) {
154: try {
155: possibleValueSet
156: .add(getInstanceValue(element, appName));
157: } catch (MissingNodeException ignore) {
158: // Ignore: we'll just use an n/a value for this instance
159: }
160: }
161:
162: public String getRange() {
163: return collectionToRange(possibleValueSet);
164: }
165:
166: @Override
167: public String getInstanceValue(Element element, String appName)
168: throws MissingNodeException {
169: return "\"" + super .getInstanceValue(element, appName)
170: + "\"";
171: }
172: }
173:
174: public static class BooleanAttribute extends XPathAttribute {
175: public BooleanAttribute(String name, String xpath) {
176: super (name, xpath);
177: }
178:
179: public void scan(Element element, String appName)
180: throws MissingNodeException {
181: // Nothing to do.
182: }
183:
184: public String getRange() {
185: return "{true, false}";
186: }
187:
188: @Override
189: public String getInstanceValue(Element element, String appName)
190: throws MissingNodeException {
191: try {
192: String value = super .getInstanceValue(element, appName);
193: return "\"" + Boolean.valueOf(value).toString() + "\"";
194: } catch (MissingNodeException e) {
195: return "\"false\"";
196: }
197: }
198: }
199:
200: private static final int UNCLASSIFIED = 0;
201: private static final int BUG = 1;
202: private static final int NOT_BUG = 2;
203: private static final int HARMLESS = 4;
204: private static final int HARMLESS_BUG = HARMLESS | BUG;
205:
206: public static abstract class AbstractClassificationAttribute
207: implements Attribute {
208:
209: /* (non-Javadoc)
210: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
211: */
212: public String getName() {
213: return "classification";
214: }
215:
216: /* (non-Javadoc)
217: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
218: */
219: public void scan(Element element, String appName)
220: throws MissingNodeException {
221: }
222:
223: /* (non-Javadoc)
224: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
225: */
226: public String getInstanceValue(Element element, String appName)
227: throws MissingNodeException {
228: String annotationText = element
229: .valueOf("./UserAnnotation[text()]");
230: //System.out.println("annotationText=" + annotationText);
231:
232: int state = getBugClassification(annotationText);
233: return bugToString(state);
234: }
235:
236: protected abstract String bugToString(int bugType)
237: throws MissingNodeException;
238:
239: }
240:
241: public static class ClassificationAttribute extends
242: AbstractClassificationAttribute {
243: public String getRange() {
244: return "{bug,not_bug,harmless_bug}";
245: }
246:
247: @Override
248: protected String bugToString(int state)
249: throws MissingNodeException {
250: if (state == NOT_BUG)
251: return "not_bug";
252: else if (state == BUG)
253: return "bug";
254: else if (state == HARMLESS_BUG)
255: return "harmless_bug";
256: else
257: throw new MissingNodeException("Unclassified warning");
258:
259: }
260: }
261:
262: public static class BinaryClassificationAttribute extends
263: AbstractClassificationAttribute {
264: /* (non-Javadoc)
265: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
266: */
267: public String getRange() {
268: return "{bug, not_bug}";
269: }
270:
271: /* (non-Javadoc)
272: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.AbstractClassificationAttribute#bugToString(int)
273: */
274: @Override
275: protected String bugToString(int state)
276: throws MissingNodeException {
277: if (state == BUG)
278: return "bug";
279: else if (state == NOT_BUG || state == HARMLESS_BUG)
280: return "not_bug";
281: else
282: throw new MissingNodeException("unclassified warning");
283: }
284: }
285:
286: public static class NumericAttribute extends XPathAttribute {
287: public NumericAttribute(String name, String xpath) {
288: super (name, xpath);
289: }
290:
291: public void scan(Element element, String appName)
292: throws MissingNodeException {
293: }
294:
295: public String getRange() {
296: return "numeric";
297: }
298: }
299:
300: public static class PriorityAttribute implements Attribute {
301: public String getName() {
302: return "priority";
303: }
304:
305: public void scan(Element element, String appName)
306: throws MissingNodeException {
307: }
308:
309: public String getRange() {
310: return "{low,medium,high}";
311: }
312:
313: public String getInstanceValue(Element element, String appName)
314: throws MissingNodeException {
315: org.dom4j.Attribute attribute = element
316: .attribute("priority");
317: if (attribute == null)
318: throw new MissingNodeException(
319: "Missing priority attribute");
320: String value = attribute.getValue();
321: try {
322: int prio = Integer.parseInt(value);
323: switch (prio) {
324: case 1:
325: return "high";
326: case 2:
327: return "medium";
328: case 3:
329: return "low";
330: default:
331: return "?";
332: }
333: } catch (NumberFormatException e) {
334: throw new MissingNodeException(
335: "Invalid priority value: " + value);
336: }
337: }
338: }
339:
340: /**
341: * An attribute that just gives each instance a unique id.
342: * The application name is prepended, so each unique id
343: * really unique, even across applications.
344: * Obviously, this attribute shouldn't be used as input
345: * to a learning algorithm.
346: *
347: * <p>Uses the Element's uid attribute if it has one.</p>
348: */
349: public static class IdAttribute implements Attribute {
350: private TreeSet<String> possibleValueSet = new TreeSet<String>();
351:
352: private boolean scanning = true;
353: private int count = 0;
354:
355: public String getName() {
356: return "id";
357: }
358:
359: public void scan(Element element, String appName)
360: throws MissingNodeException {
361: possibleValueSet.add(instanceValue(element, appName));
362: }
363:
364: public String getRange() {
365: return collectionToRange(possibleValueSet);
366: }
367:
368: public String getInstanceValue(Element element, String appName)
369: throws MissingNodeException {
370: if (scanning) {
371: count = 0;
372: scanning = false;
373: }
374: return instanceValue(element, appName);
375: }
376:
377: private String instanceValue(Element element, String appName) {
378: String nextId;
379:
380: org.dom4j.Attribute uidAttr = element.attribute("uid");
381: if (uidAttr != null) {
382: nextId = uidAttr.getValue();
383: } else {
384: nextId = String.valueOf(count++);
385: }
386:
387: return "\"" + appName + "-" + nextId + "\"";
388: }
389: }
390:
391: public static class IdStringAttribute implements Attribute {
392:
393: /* (non-Javadoc)
394: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
395: */
396: public String getName() {
397: return "ids";
398: }
399:
400: /* (non-Javadoc)
401: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
402: */
403: public void scan(Element element, String appName)
404: throws MissingNodeException {
405: }
406:
407: /* (non-Javadoc)
408: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
409: */
410: public String getRange() {
411: return "string";
412: }
413:
414: int count = 0;
415:
416: /* (non-Javadoc)
417: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
418: */
419: public String getInstanceValue(Element element, String appName)
420: throws MissingNodeException {
421: String value;
422: org.dom4j.Attribute uidAttr = element.attribute("uid");
423: if (uidAttr == null) {
424: value = String.valueOf(count++);
425: } else {
426: value = uidAttr.getStringValue();
427: }
428:
429: return "\"" + appName + "-" + value + "\"";
430: }
431:
432: }
433:
434: private static final String RANDOM_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
435:
436: public static class RandomIdAttribute implements Attribute {
437:
438: private Random rng = new Random();
439: private IdentityHashMap<Element, String> idMap = new IdentityHashMap<Element, String>();
440:
441: /* (non-Javadoc)
442: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
443: */
444: public String getName() {
445: return "idr";
446: }
447:
448: /* (non-Javadoc)
449: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
450: */
451: public void scan(Element element, String appName)
452: throws MissingNodeException {
453: idMap.put(element, generateId());
454: }
455:
456: private String generateId() {
457: StringBuffer buf = new StringBuffer();
458:
459: for (int i = 0; i < 20; ++i) {
460: char c = RANDOM_CHARS.charAt(rng.nextInt(RANDOM_CHARS
461: .length()));
462: buf.append(c);
463: }
464:
465: return buf.toString();
466: }
467:
468: /* (non-Javadoc)
469: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
470: */
471: public String getRange() {
472: TreeSet<String> range = new TreeSet<String>();
473: range.addAll(idMap.values());
474: if (range.size() != idMap.size())
475: throw new IllegalStateException("id collision!");
476: return collectionToRange(range);
477: }
478:
479: /* (non-Javadoc)
480: * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
481: */
482: public String getInstanceValue(Element element, String appName)
483: throws MissingNodeException {
484: String id = idMap.get(element);
485: if (id == null)
486: throw new IllegalStateException("Element not scanned?");
487: return "\"" + id + "\"";
488: }
489:
490: }
491:
492: public static class AppNameAttribute implements Attribute {
493: private Set<String> appNameSet = new TreeSet<String>();
494:
495: public String getName() {
496: return "appname";
497: }
498:
499: public void scan(Element element, String appName)
500: throws MissingNodeException {
501: appNameSet.add(appName);
502: }
503:
504: public String getRange() {
505: return collectionToRange(appNameSet);
506: }
507:
508: public String getInstanceValue(Element element, String appName)
509: throws MissingNodeException {
510: return "\"" + appName + "\"";
511: }
512: }
513:
514: public static String collectionToRange(Collection<String> collection) {
515: StringBuffer buf = new StringBuffer();
516: buf.append("{");
517: for (String aCollection : collection) {
518: if (buf.length() > 1)
519: buf.append(',');
520: buf.append(aCollection);
521: }
522: buf.append("}");
523:
524: return buf.toString();
525: }
526:
527: public interface AttributeCallback {
528: public void apply(Attribute attribute)
529: throws MissingNodeException, IOException;
530: }
531:
532: // ------------------------------------------------------------
533: // Constants
534: // ------------------------------------------------------------
535:
536: private static final String DEFAULT_NODE_SELECTION_XPATH = "/BugCollection/BugInstance";
537:
538: // ------------------------------------------------------------
539: // Fields
540: // ------------------------------------------------------------
541:
542: private List<Attribute> attributeList;
543: private String nodeSelectionXpath;
544: private boolean dropUnclassifiedWarnings;
545: private String appName;
546:
547: // ------------------------------------------------------------
548: // Public methods
549: // ------------------------------------------------------------
550:
551: public ConvertToARFF() {
552: this .attributeList = new LinkedList<Attribute>();
553: this .nodeSelectionXpath = DEFAULT_NODE_SELECTION_XPATH;
554: this .dropUnclassifiedWarnings = false;
555: }
556:
557: public void setAppName(String appName) {
558: this .appName = appName;
559: }
560:
561: /**
562: * Set the xpath expression used to select BugInstance nodes.
563: *
564: * @param nodeSelectionXpath the node selection xpath expression
565: */
566: public void setNodeSelectionXpath(String nodeSelectionXpath) {
567: this .nodeSelectionXpath = nodeSelectionXpath;
568: }
569:
570: public int getNumAttributes() {
571: return attributeList.size();
572: }
573:
574: public void dropUnclassifiedWarnings() {
575: this .dropUnclassifiedWarnings = true;
576: }
577:
578: public void addAttribute(Attribute attribute) {
579: attributeList.add(attribute);
580: }
581:
582: public void addNominalAttribute(String name, String xpath) {
583: addAttribute(new NominalAttribute(name, xpath));
584: }
585:
586: public void addBooleanAttribute(String name, String xpath) {
587: addAttribute(new BooleanAttribute(name, xpath));
588: }
589:
590: public void addClassificationAttribute() {
591: addAttribute(new ClassificationAttribute());
592: }
593:
594: public void addNumericAttribute(String name, String xpath) {
595: addAttribute(new NumericAttribute(name, xpath));
596: }
597:
598: public void addPriorityAttribute() {
599: addAttribute(new PriorityAttribute());
600: }
601:
602: public void addIdAttribute() {
603: addAttribute(new IdAttribute());
604: }
605:
606: public void addAppNameAttribute() {
607: addAttribute(new AppNameAttribute());
608: }
609:
610: /**
611: * Convert a single Document to ARFF format.
612: *
613: * @param relationName the relation name
614: * @param document the Document
615: * @param appName the application name
616: * @param out Writer to write the ARFF output to
617: */
618: public void convert(String relationName, Document document,
619: String appName, final Writer out) throws IOException,
620: MissingNodeException {
621: scan(document, appName);
622: generateHeader(relationName, out);
623: generateInstances(document, appName, out);
624: }
625:
626: /**
627: * Scan a Document to find out the ranges of attributes.
628: * All Documents must be scanned before generating the ARFF
629: * header and instances.
630: *
631: * @param document the Document
632: * @param appName the application name
633: */
634: public void scan(Document document, final String appName)
635: throws MissingNodeException, IOException {
636: List<Element> bugInstanceList = getBugInstanceList(document);
637:
638: for (final Element element : bugInstanceList) {
639: scanAttributeList(new AttributeCallback() {
640: public void apply(Attribute attribute)
641: throws MissingNodeException {
642: attribute.scan(element, appName);
643: }
644: });
645: }
646: }
647:
648: /**
649: * Generate ARFF header.
650: * Documents must have already been scanned.
651: *
652: * @param relationName the relation name
653: * @param out Writer to write the ARFF output to
654: */
655: public void generateHeader(String relationName, final Writer out)
656: throws MissingNodeException, IOException {
657: out.write("@relation ");
658: out.write(relationName);
659: out.write("\n\n");
660:
661: scanAttributeList(new AttributeCallback() {
662: public void apply(Attribute attribute) throws IOException {
663: out.write("@attribute ");
664: out.write(attribute.getName());
665: out.write(" ");
666: out.write(attribute.getRange());
667: out.write("\n");
668: }
669: });
670: out.write("\n");
671:
672: out.write("@data\n");
673: }
674:
675: /**
676: * Generate instances from given Document.
677: * Document should already have been scanned, and the ARFF header generated.
678: *
679: * @param document the Document
680: * @param appName the application name
681: * @param out Writer to write the ARFF output to
682: */
683: public void generateInstances(Document document,
684: final String appName, final Writer out)
685: throws MissingNodeException, IOException {
686: List<Element> bugInstanceList = getBugInstanceList(document);
687:
688: for (final Element element : bugInstanceList) {
689: scanAttributeList(new AttributeCallback() {
690: boolean first = true;
691:
692: public void apply(Attribute attribute)
693: throws IOException {
694: if (!first)
695: out.write(",");
696: first = false;
697: String value;
698: try {
699: value = attribute.getInstanceValue(element,
700: appName);
701: } catch (MissingNodeException e) {
702: value = "?";
703: }
704: out.write(value);
705: }
706: });
707: out.write("\n");
708: }
709: }
710:
711: /**
712: * Apply a callback to all Attributes.
713: *
714: * @param callback the callback
715: */
716: public void scanAttributeList(AttributeCallback callback)
717: throws MissingNodeException, IOException {
718: for (Attribute attribute : attributeList) {
719: callback.apply(attribute);
720: }
721: }
722:
723: // ------------------------------------------------------------
724: // Implementation
725: // ------------------------------------------------------------
726:
727: private static int getBugClassification(String annotationText) {
728: StringTokenizer tok = new StringTokenizer(annotationText,
729: " \t\r\n\f.,:;-");
730:
731: int state = UNCLASSIFIED;
732:
733: while (tok.hasMoreTokens()) {
734: String s = tok.nextToken();
735: if (s.equals("BUG"))
736: state |= BUG;
737: else if (s.equals("NOT_BUG"))
738: state |= NOT_BUG;
739: else if (s.equals("HARMLESS"))
740: state |= HARMLESS;
741: }
742:
743: if ((state & NOT_BUG) != 0)
744: return NOT_BUG;
745: else if ((state & BUG) != 0)
746: return ((state & HARMLESS) != 0) ? HARMLESS_BUG : BUG;
747: else
748: return UNCLASSIFIED;
749: }
750:
751: private List<Element> getBugInstanceList(Document document) {
752: List<Element> bugInstanceList = document
753: .selectNodes(nodeSelectionXpath);
754: if (dropUnclassifiedWarnings) {
755: for (Iterator<Element> i = bugInstanceList.iterator(); i
756: .hasNext();) {
757: Element element = i.next();
758: String annotationText = element
759: .valueOf("./UserAnnotation[text()]");
760: int classification = getBugClassification(annotationText);
761: if (classification == UNCLASSIFIED)
762: i.remove();
763: }
764: }
765: return bugInstanceList;
766: }
767:
768: private static class C2ACommandLine extends CommandLine {
769: private ConvertToARFF converter = new ConvertToARFF();
770:
771: public C2ACommandLine() {
772: addOption("-select", "xpath expression",
773: "select BugInstance elements");
774: addSwitch("-train", "drop unclassified warnings");
775: addSwitch("-id", "add unique id attribute (as nominal)");
776: addSwitch("-ids", "add unique id attribute (as string)");
777: addSwitch("-idr",
778: "add random unique id attribtue (as nominal)");
779: addSwitch("-app", "add application name attribute");
780: addOption("-nominal", "attrName,xpath",
781: "add a nominal attribute");
782: addOption("-boolean", "attrName,xpath",
783: "add a boolean attribute");
784: addOption("-numeric", "attrName,xpath",
785: "add a numeric attribute");
786: addSwitch("-classification",
787: "add bug classification attribute");
788: addSwitch("-binclass",
789: "add binary (bug/not_bug) classification attribute");
790: addSwitch("-priority", "add priority attribute");
791: addOption("-appname", "app name",
792: "set application name of all tuples");
793: }
794:
795: public ConvertToARFF getConverter() {
796: return converter;
797: }
798:
799: @Override
800: protected void handleOption(String option,
801: String optionExtraPart) throws IOException {
802: if (option.equals("-train")) {
803: converter.dropUnclassifiedWarnings();
804: } else if (option.equals("-id")) {
805: converter.addIdAttribute();
806: } else if (option.equals("-ids")) {
807: converter.addAttribute(new IdStringAttribute());
808: } else if (option.equals("-idr")) {
809: converter.addAttribute(new RandomIdAttribute());
810: } else if (option.equals("-app")) {
811: converter.addAppNameAttribute();
812: } else if (option.equals("-classification")) {
813: converter.addClassificationAttribute();
814: } else if (option.equals("-binclass")) {
815: converter
816: .addAttribute(new BinaryClassificationAttribute());
817: } else if (option.equals("-priority")) {
818: converter.addPriorityAttribute();
819: }
820: }
821:
822: private interface XPathAttributeCreator {
823: public Attribute create(String name, String xpath);
824: }
825:
826: @Override
827: protected void handleOptionWithArgument(String option,
828: String argument) throws IOException {
829:
830: if (option.equals("-select")) {
831: converter.setNodeSelectionXpath(argument);
832: } else if (option.equals("-nominal")) {
833: addXPathAttribute(option, argument,
834: new XPathAttributeCreator() {
835: public Attribute create(String name,
836: String xpath) {
837: return new NominalAttribute(name, xpath);
838: }
839: });
840: } else if (option.equals("-boolean")) {
841: addXPathAttribute(option, argument,
842: new XPathAttributeCreator() {
843: public Attribute create(String name,
844: String xpath) {
845: return new BooleanAttribute(name, xpath);
846: }
847: });
848: } else if (option.equals("-numeric")) {
849: addXPathAttribute(option, argument,
850: new XPathAttributeCreator() {
851: public Attribute create(String name,
852: String xpath) {
853: return new NumericAttribute(name, xpath);
854: }
855: });
856: } else if (option.equals("-appname")) {
857: converter.setAppName(argument);
858: }
859: }
860:
861: protected void addXPathAttribute(String option,
862: String argument, XPathAttributeCreator creator) {
863: int comma = argument.indexOf(',');
864: if (comma < 0) {
865: throw new IllegalArgumentException(
866: "Missing comma separating attribute name and xpath in "
867: + option + " option: " + argument);
868: }
869: String attrName = argument.substring(0, comma);
870: String xpath = argument.substring(comma + 1);
871: converter.addAttribute(creator.create(attrName, xpath));
872: }
873:
874: public void printUsage(PrintStream out) {
875: out
876: .println("Usage: "
877: + ConvertToARFF.class.getName()
878: + " [options] <relation name> <output file> <findbugs results> [<findbugs results>...]");
879: super .printUsage(out);
880: }
881: }
882:
883: public String toAppName(String fileName) {
884: if (appName != null)
885: return appName;
886:
887: // Remove file extension, if any
888: int lastDot = fileName.lastIndexOf('.');
889: if (lastDot >= 0)
890: fileName = fileName.substring(0, lastDot);
891: return fileName;
892: }
893:
894: public static void main(String[] argv) throws Exception {
895: // Expand any option files
896: argv = CommandLine.expandOptionFiles(argv, true, true);
897:
898: // Parse command line arguments
899: C2ACommandLine commandLine = new C2ACommandLine();
900: int argCount = commandLine.parse(argv);
901: if (argCount > argv.length - 3) {
902: commandLine.printUsage(System.err);
903: System.exit(1);
904: }
905: String relationName = argv[argCount++];
906: String outputFileName = argv[argCount++];
907:
908: // Create the converter
909: ConvertToARFF converter = commandLine.getConverter();
910: if (converter.getNumAttributes() == 0) {
911: throw new IllegalArgumentException(
912: "No attributes specified!");
913: }
914:
915: // Open output file
916: Writer out = new OutputStreamWriter(new BufferedOutputStream(
917: new FileOutputStream(outputFileName)));
918:
919: // Read documents,
920: // scan documents to find ranges of attributes
921: List<DataFile> dataFileList = new ArrayList<DataFile>();
922: while (argCount < argv.length) {
923: String fileName = argv[argCount++];
924:
925: // Read input file as dom4j tree
926: SAXReader reader = new SAXReader();
927: Document document = reader.read(fileName);
928:
929: DataFile dataFile = new DataFile(document, converter
930: .toAppName(fileName));
931: dataFileList.add(dataFile);
932:
933: converter.scan(dataFile.getDocument(), dataFile
934: .getAppName());
935: }
936:
937: // Generate ARFF header
938: converter.generateHeader(relationName, out);
939:
940: // Generate instances from each document
941: for (DataFile dataFile : dataFileList) {
942: converter.generateInstances(dataFile.getDocument(),
943: dataFile.getAppName(), out);
944: }
945:
946: out.close();
947: }
948:
949: }
950:
951: // vim:ts=4
|