Source Code Cross Referenced for ConvertToARFF.java in  » Code-Analyzer » findbugs » edu » umd » cs » findbugs » ml » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Code Analyzer » findbugs » edu.umd.cs.findbugs.ml 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * Machine Learning support for FindBugs
003:         * Copyright (C) 2004,2005 University of Maryland
004:         * 
005:         * This library is free software; you can redistribute it and/or
006:         * modify it under the terms of the GNU Lesser General Public
007:         * License as published by the Free Software Foundation; either
008:         * version 2.1 of the License, or (at your option) any later version.
009:         * 
010:         * This library is distributed in the hope that it will be useful,
011:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
012:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013:         * Lesser General Public License for more details.
014:         * 
015:         * You should have received a copy of the GNU Lesser General Public
016:         * License along with this library; if not, write to the Free Software
017:         * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018:         */
019:
020:        package edu.umd.cs.findbugs.ml;
021:
022:        import java.io.BufferedOutputStream;
023:        import java.io.FileOutputStream;
024:        import java.io.IOException;
025:        import java.io.OutputStreamWriter;
026:        import java.io.PrintStream;
027:        import java.io.Writer;
028:        import java.util.ArrayList;
029:        import java.util.Collection;
030:        import java.util.IdentityHashMap;
031:        import java.util.Iterator;
032:        import java.util.LinkedList;
033:        import java.util.List;
034:        import java.util.Random;
035:        import java.util.Set;
036:        import java.util.StringTokenizer;
037:        import java.util.TreeSet;
038:
039:        import org.dom4j.Document;
040:        import org.dom4j.Element;
041:        import org.dom4j.Node;
042:        import org.dom4j.io.SAXReader;
043:
044:        import edu.umd.cs.findbugs.BugCollection;
045:        import edu.umd.cs.findbugs.BugInstance;
046:        import edu.umd.cs.findbugs.config.CommandLine;
047:
048:        /**
049:         * Convert a BugCollection into ARFF format.
050:         * See Witten and Frank, <em>Data Mining</em>, ISBN 1-55860-552-5.
051:         *
052:         * @see BugCollection
053:         * @see BugInstance
054:         * @author David Hovemeyer
055:         */
056:        public class ConvertToARFF {
057:            // ------------------------------------------------------------
058:            // Helper classes
059:            // ------------------------------------------------------------
060:
061:            private static class DataFile {
062:                private Document document;
063:                private String appName;
064:
065:                public DataFile(Document document, String appName) {
066:                    this .document = document;
067:                    this .appName = appName;
068:                }
069:
070:                public Document getDocument() {
071:                    return document;
072:                }
073:
074:                public String getAppName() {
075:                    return appName;
076:                }
077:            }
078:
079:            private static class MissingNodeException extends Exception {
080:                private static final long serialVersionUID = -5042140832791541208L;
081:
082:                public MissingNodeException(String msg) {
083:                    super (msg);
084:                }
085:            }
086:
087:            public interface Attribute {
088:                public String getName();
089:
090:                public void scan(Element element, String appName)
091:                        throws MissingNodeException;
092:
093:                public String getRange();
094:
095:                public String getInstanceValue(Element element, String appName)
096:                        throws MissingNodeException;
097:            }
098:
099:            private abstract static class XPathAttribute implements  Attribute {
100:                private String name;
101:                private String xpath;
102:
103:                public XPathAttribute(String name, String xpath) {
104:                    this .name = name;
105:                    this .xpath = xpath;
106:                }
107:
108:                public String getName() {
109:                    return name;
110:                }
111:
112:                public String getInstanceValue(Element element, String appName)
113:                        throws MissingNodeException {
114:                    Object value = element.selectObject(xpath);
115:                    if (value == null)
116:                        throw new MissingNodeException(
117:                                "Could not get value from element (path="
118:                                        + xpath + ")");
119:                    if (value instanceof  List) {
120:                        List<?> list = (List<?>) value;
121:                        if (list.size() == 0)
122:                            throw new MissingNodeException(
123:                                    "Could not get value from element (path="
124:                                            + xpath + ")");
125:                        value = list.get(0);
126:                    }
127:
128:                    if (value instanceof  Node) {
129:                        Node node = (Node) value;
130:                        return node.getText();
131:                    } else if (value instanceof  String) {
132:                        return (String) value;
133:                    } else if (value instanceof  Number) {
134:                        String s = value.toString();
135:                        if (s.endsWith(".0"))
136:                            s = s.substring(0, s.length() - 2);
137:                        return s;
138:                    } else
139:                        throw new IllegalStateException(
140:                                "Unexpected object returned from xpath query: "
141:                                        + value);
142:                }
143:            }
144:
145:            public static class NominalAttribute extends XPathAttribute {
146:                private Set<String> possibleValueSet;
147:
148:                public NominalAttribute(String name, String xpath) {
149:                    super (name, xpath);
150:                    this .possibleValueSet = new TreeSet<String>();
151:                }
152:
153:                public void scan(Element element, String appName) {
154:                    try {
155:                        possibleValueSet
156:                                .add(getInstanceValue(element, appName));
157:                    } catch (MissingNodeException ignore) {
158:                        // Ignore: we'll just use an n/a value for this instance
159:                    }
160:                }
161:
162:                public String getRange() {
163:                    return collectionToRange(possibleValueSet);
164:                }
165:
166:                @Override
167:                public String getInstanceValue(Element element, String appName)
168:                        throws MissingNodeException {
169:                    return "\"" + super .getInstanceValue(element, appName)
170:                            + "\"";
171:                }
172:            }
173:
174:            public static class BooleanAttribute extends XPathAttribute {
175:                public BooleanAttribute(String name, String xpath) {
176:                    super (name, xpath);
177:                }
178:
179:                public void scan(Element element, String appName)
180:                        throws MissingNodeException {
181:                    // Nothing to do.
182:                }
183:
184:                public String getRange() {
185:                    return "{true, false}";
186:                }
187:
188:                @Override
189:                public String getInstanceValue(Element element, String appName)
190:                        throws MissingNodeException {
191:                    try {
192:                        String value = super .getInstanceValue(element, appName);
193:                        return "\"" + Boolean.valueOf(value).toString() + "\"";
194:                    } catch (MissingNodeException e) {
195:                        return "\"false\"";
196:                    }
197:                }
198:            }
199:
200:            private static final int UNCLASSIFIED = 0;
201:            private static final int BUG = 1;
202:            private static final int NOT_BUG = 2;
203:            private static final int HARMLESS = 4;
204:            private static final int HARMLESS_BUG = HARMLESS | BUG;
205:
206:            public static abstract class AbstractClassificationAttribute
207:                    implements  Attribute {
208:
209:                /* (non-Javadoc)
210:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
211:                 */
212:                public String getName() {
213:                    return "classification";
214:                }
215:
216:                /* (non-Javadoc)
217:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
218:                 */
219:                public void scan(Element element, String appName)
220:                        throws MissingNodeException {
221:                }
222:
223:                /* (non-Javadoc)
224:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
225:                 */
226:                public String getInstanceValue(Element element, String appName)
227:                        throws MissingNodeException {
228:                    String annotationText = element
229:                            .valueOf("./UserAnnotation[text()]");
230:                    //System.out.println("annotationText=" + annotationText);
231:
232:                    int state = getBugClassification(annotationText);
233:                    return bugToString(state);
234:                }
235:
236:                protected abstract String bugToString(int bugType)
237:                        throws MissingNodeException;
238:
239:            }
240:
241:            public static class ClassificationAttribute extends
242:                    AbstractClassificationAttribute {
243:                public String getRange() {
244:                    return "{bug,not_bug,harmless_bug}";
245:                }
246:
247:                @Override
248:                protected String bugToString(int state)
249:                        throws MissingNodeException {
250:                    if (state == NOT_BUG)
251:                        return "not_bug";
252:                    else if (state == BUG)
253:                        return "bug";
254:                    else if (state == HARMLESS_BUG)
255:                        return "harmless_bug";
256:                    else
257:                        throw new MissingNodeException("Unclassified warning");
258:
259:                }
260:            }
261:
262:            public static class BinaryClassificationAttribute extends
263:                    AbstractClassificationAttribute {
264:                /* (non-Javadoc)
265:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
266:                 */
267:                public String getRange() {
268:                    return "{bug, not_bug}";
269:                }
270:
271:                /* (non-Javadoc)
272:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.AbstractClassificationAttribute#bugToString(int)
273:                 */
274:                @Override
275:                protected String bugToString(int state)
276:                        throws MissingNodeException {
277:                    if (state == BUG)
278:                        return "bug";
279:                    else if (state == NOT_BUG || state == HARMLESS_BUG)
280:                        return "not_bug";
281:                    else
282:                        throw new MissingNodeException("unclassified warning");
283:                }
284:            }
285:
286:            public static class NumericAttribute extends XPathAttribute {
287:                public NumericAttribute(String name, String xpath) {
288:                    super (name, xpath);
289:                }
290:
291:                public void scan(Element element, String appName)
292:                        throws MissingNodeException {
293:                }
294:
295:                public String getRange() {
296:                    return "numeric";
297:                }
298:            }
299:
300:            public static class PriorityAttribute implements  Attribute {
301:                public String getName() {
302:                    return "priority";
303:                }
304:
305:                public void scan(Element element, String appName)
306:                        throws MissingNodeException {
307:                }
308:
309:                public String getRange() {
310:                    return "{low,medium,high}";
311:                }
312:
313:                public String getInstanceValue(Element element, String appName)
314:                        throws MissingNodeException {
315:                    org.dom4j.Attribute attribute = element
316:                            .attribute("priority");
317:                    if (attribute == null)
318:                        throw new MissingNodeException(
319:                                "Missing priority attribute");
320:                    String value = attribute.getValue();
321:                    try {
322:                        int prio = Integer.parseInt(value);
323:                        switch (prio) {
324:                        case 1:
325:                            return "high";
326:                        case 2:
327:                            return "medium";
328:                        case 3:
329:                            return "low";
330:                        default:
331:                            return "?";
332:                        }
333:                    } catch (NumberFormatException e) {
334:                        throw new MissingNodeException(
335:                                "Invalid priority value: " + value);
336:                    }
337:                }
338:            }
339:
340:            /**
341:             * An attribute that just gives each instance a unique id.
342:             * The application name is prepended, so each unique id
343:             * really unique, even across applications.
344:             * Obviously, this attribute shouldn't be used as input
345:             * to a learning algorithm.
346:             * 
347:             * <p>Uses the Element's uid attribute if it has one.</p>
348:             */
349:            public static class IdAttribute implements  Attribute {
350:                private TreeSet<String> possibleValueSet = new TreeSet<String>();
351:
352:                private boolean scanning = true;
353:                private int count = 0;
354:
355:                public String getName() {
356:                    return "id";
357:                }
358:
359:                public void scan(Element element, String appName)
360:                        throws MissingNodeException {
361:                    possibleValueSet.add(instanceValue(element, appName));
362:                }
363:
364:                public String getRange() {
365:                    return collectionToRange(possibleValueSet);
366:                }
367:
368:                public String getInstanceValue(Element element, String appName)
369:                        throws MissingNodeException {
370:                    if (scanning) {
371:                        count = 0;
372:                        scanning = false;
373:                    }
374:                    return instanceValue(element, appName);
375:                }
376:
377:                private String instanceValue(Element element, String appName) {
378:                    String nextId;
379:
380:                    org.dom4j.Attribute uidAttr = element.attribute("uid");
381:                    if (uidAttr != null) {
382:                        nextId = uidAttr.getValue();
383:                    } else {
384:                        nextId = String.valueOf(count++);
385:                    }
386:
387:                    return "\"" + appName + "-" + nextId + "\"";
388:                }
389:            }
390:
391:            public static class IdStringAttribute implements  Attribute {
392:
393:                /* (non-Javadoc)
394:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
395:                 */
396:                public String getName() {
397:                    return "ids";
398:                }
399:
400:                /* (non-Javadoc)
401:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
402:                 */
403:                public void scan(Element element, String appName)
404:                        throws MissingNodeException {
405:                }
406:
407:                /* (non-Javadoc)
408:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
409:                 */
410:                public String getRange() {
411:                    return "string";
412:                }
413:
414:                int count = 0;
415:
416:                /* (non-Javadoc)
417:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
418:                 */
419:                public String getInstanceValue(Element element, String appName)
420:                        throws MissingNodeException {
421:                    String value;
422:                    org.dom4j.Attribute uidAttr = element.attribute("uid");
423:                    if (uidAttr == null) {
424:                        value = String.valueOf(count++);
425:                    } else {
426:                        value = uidAttr.getStringValue();
427:                    }
428:
429:                    return "\"" + appName + "-" + value + "\"";
430:                }
431:
432:            }
433:
434:            private static final String RANDOM_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
435:
436:            public static class RandomIdAttribute implements  Attribute {
437:
438:                private Random rng = new Random();
439:                private IdentityHashMap<Element, String> idMap = new IdentityHashMap<Element, String>();
440:
441:                /* (non-Javadoc)
442:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
443:                 */
444:                public String getName() {
445:                    return "idr";
446:                }
447:
448:                /* (non-Javadoc)
449:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
450:                 */
451:                public void scan(Element element, String appName)
452:                        throws MissingNodeException {
453:                    idMap.put(element, generateId());
454:                }
455:
456:                private String generateId() {
457:                    StringBuffer buf = new StringBuffer();
458:
459:                    for (int i = 0; i < 20; ++i) {
460:                        char c = RANDOM_CHARS.charAt(rng.nextInt(RANDOM_CHARS
461:                                .length()));
462:                        buf.append(c);
463:                    }
464:
465:                    return buf.toString();
466:                }
467:
468:                /* (non-Javadoc)
469:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
470:                 */
471:                public String getRange() {
472:                    TreeSet<String> range = new TreeSet<String>();
473:                    range.addAll(idMap.values());
474:                    if (range.size() != idMap.size())
475:                        throw new IllegalStateException("id collision!");
476:                    return collectionToRange(range);
477:                }
478:
479:                /* (non-Javadoc)
480:                 * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
481:                 */
482:                public String getInstanceValue(Element element, String appName)
483:                        throws MissingNodeException {
484:                    String id = idMap.get(element);
485:                    if (id == null)
486:                        throw new IllegalStateException("Element not scanned?");
487:                    return "\"" + id + "\"";
488:                }
489:
490:            }
491:
492:            public static class AppNameAttribute implements  Attribute {
493:                private Set<String> appNameSet = new TreeSet<String>();
494:
495:                public String getName() {
496:                    return "appname";
497:                }
498:
499:                public void scan(Element element, String appName)
500:                        throws MissingNodeException {
501:                    appNameSet.add(appName);
502:                }
503:
504:                public String getRange() {
505:                    return collectionToRange(appNameSet);
506:                }
507:
508:                public String getInstanceValue(Element element, String appName)
509:                        throws MissingNodeException {
510:                    return "\"" + appName + "\"";
511:                }
512:            }
513:
514:            public static String collectionToRange(Collection<String> collection) {
515:                StringBuffer buf = new StringBuffer();
516:                buf.append("{");
517:                for (String aCollection : collection) {
518:                    if (buf.length() > 1)
519:                        buf.append(',');
520:                    buf.append(aCollection);
521:                }
522:                buf.append("}");
523:
524:                return buf.toString();
525:            }
526:
527:            public interface AttributeCallback {
528:                public void apply(Attribute attribute)
529:                        throws MissingNodeException, IOException;
530:            }
531:
532:            // ------------------------------------------------------------
533:            // Constants
534:            // ------------------------------------------------------------
535:
536:            private static final String DEFAULT_NODE_SELECTION_XPATH = "/BugCollection/BugInstance";
537:
538:            // ------------------------------------------------------------
539:            // Fields
540:            // ------------------------------------------------------------
541:
542:            private List<Attribute> attributeList;
543:            private String nodeSelectionXpath;
544:            private boolean dropUnclassifiedWarnings;
545:            private String appName;
546:
547:            // ------------------------------------------------------------
548:            // Public methods
549:            // ------------------------------------------------------------
550:
551:            public ConvertToARFF() {
552:                this .attributeList = new LinkedList<Attribute>();
553:                this .nodeSelectionXpath = DEFAULT_NODE_SELECTION_XPATH;
554:                this .dropUnclassifiedWarnings = false;
555:            }
556:
557:            public void setAppName(String appName) {
558:                this .appName = appName;
559:            }
560:
561:            /**
562:             * Set the xpath expression used to select BugInstance nodes.
563:             * 
564:             * @param nodeSelectionXpath the node selection xpath expression
565:             */
566:            public void setNodeSelectionXpath(String nodeSelectionXpath) {
567:                this .nodeSelectionXpath = nodeSelectionXpath;
568:            }
569:
570:            public int getNumAttributes() {
571:                return attributeList.size();
572:            }
573:
574:            public void dropUnclassifiedWarnings() {
575:                this .dropUnclassifiedWarnings = true;
576:            }
577:
578:            public void addAttribute(Attribute attribute) {
579:                attributeList.add(attribute);
580:            }
581:
582:            public void addNominalAttribute(String name, String xpath) {
583:                addAttribute(new NominalAttribute(name, xpath));
584:            }
585:
586:            public void addBooleanAttribute(String name, String xpath) {
587:                addAttribute(new BooleanAttribute(name, xpath));
588:            }
589:
590:            public void addClassificationAttribute() {
591:                addAttribute(new ClassificationAttribute());
592:            }
593:
594:            public void addNumericAttribute(String name, String xpath) {
595:                addAttribute(new NumericAttribute(name, xpath));
596:            }
597:
598:            public void addPriorityAttribute() {
599:                addAttribute(new PriorityAttribute());
600:            }
601:
602:            public void addIdAttribute() {
603:                addAttribute(new IdAttribute());
604:            }
605:
606:            public void addAppNameAttribute() {
607:                addAttribute(new AppNameAttribute());
608:            }
609:
610:            /**
611:             * Convert a single Document to ARFF format.
612:             *
613:             * @param relationName the relation name
614:             * @param document     the Document
615:             * @param appName      the application name
616:             * @param out          Writer to write the ARFF output to
617:             */
618:            public void convert(String relationName, Document document,
619:                    String appName, final Writer out) throws IOException,
620:                    MissingNodeException {
621:                scan(document, appName);
622:                generateHeader(relationName, out);
623:                generateInstances(document, appName, out);
624:            }
625:
626:            /**
627:             * Scan a Document to find out the ranges of attributes.
628:             * All Documents must be scanned before generating the ARFF
629:             * header and instances.
630:             *
631:             * @param document the Document
632:             * @param appName  the application name
633:             */
634:            public void scan(Document document, final String appName)
635:                    throws MissingNodeException, IOException {
636:                List<Element> bugInstanceList = getBugInstanceList(document);
637:
638:                for (final Element element : bugInstanceList) {
639:                    scanAttributeList(new AttributeCallback() {
640:                        public void apply(Attribute attribute)
641:                                throws MissingNodeException {
642:                            attribute.scan(element, appName);
643:                        }
644:                    });
645:                }
646:            }
647:
648:            /**
649:             * Generate ARFF header.
650:             * Documents must have already been scanned.
651:             *
652:             * @param relationName the relation name
653:             * @param out          Writer to write the ARFF output to
654:             */
655:            public void generateHeader(String relationName, final Writer out)
656:                    throws MissingNodeException, IOException {
657:                out.write("@relation ");
658:                out.write(relationName);
659:                out.write("\n\n");
660:
661:                scanAttributeList(new AttributeCallback() {
662:                    public void apply(Attribute attribute) throws IOException {
663:                        out.write("@attribute ");
664:                        out.write(attribute.getName());
665:                        out.write(" ");
666:                        out.write(attribute.getRange());
667:                        out.write("\n");
668:                    }
669:                });
670:                out.write("\n");
671:
672:                out.write("@data\n");
673:            }
674:
675:            /**
676:             * Generate instances from given Document.
677:             * Document should already have been scanned, and the ARFF header generated.
678:             *
679:             * @param document the Document
680:             * @param appName  the application name
681:             * @param out      Writer to write the ARFF output to
682:             */
683:            public void generateInstances(Document document,
684:                    final String appName, final Writer out)
685:                    throws MissingNodeException, IOException {
686:                List<Element> bugInstanceList = getBugInstanceList(document);
687:
688:                for (final Element element : bugInstanceList) {
689:                    scanAttributeList(new AttributeCallback() {
690:                        boolean first = true;
691:
692:                        public void apply(Attribute attribute)
693:                                throws IOException {
694:                            if (!first)
695:                                out.write(",");
696:                            first = false;
697:                            String value;
698:                            try {
699:                                value = attribute.getInstanceValue(element,
700:                                        appName);
701:                            } catch (MissingNodeException e) {
702:                                value = "?";
703:                            }
704:                            out.write(value);
705:                        }
706:                    });
707:                    out.write("\n");
708:                }
709:            }
710:
711:            /**
712:             * Apply a callback to all Attributes.
713:             *
714:             * @param callback the callback
715:             */
716:            public void scanAttributeList(AttributeCallback callback)
717:                    throws MissingNodeException, IOException {
718:                for (Attribute attribute : attributeList) {
719:                    callback.apply(attribute);
720:                }
721:            }
722:
723:            // ------------------------------------------------------------
724:            // Implementation
725:            // ------------------------------------------------------------
726:
727:            private static int getBugClassification(String annotationText) {
728:                StringTokenizer tok = new StringTokenizer(annotationText,
729:                        " \t\r\n\f.,:;-");
730:
731:                int state = UNCLASSIFIED;
732:
733:                while (tok.hasMoreTokens()) {
734:                    String s = tok.nextToken();
735:                    if (s.equals("BUG"))
736:                        state |= BUG;
737:                    else if (s.equals("NOT_BUG"))
738:                        state |= NOT_BUG;
739:                    else if (s.equals("HARMLESS"))
740:                        state |= HARMLESS;
741:                }
742:
743:                if ((state & NOT_BUG) != 0)
744:                    return NOT_BUG;
745:                else if ((state & BUG) != 0)
746:                    return ((state & HARMLESS) != 0) ? HARMLESS_BUG : BUG;
747:                else
748:                    return UNCLASSIFIED;
749:            }
750:
751:            private List<Element> getBugInstanceList(Document document) {
752:                List<Element> bugInstanceList = document
753:                        .selectNodes(nodeSelectionXpath);
754:                if (dropUnclassifiedWarnings) {
755:                    for (Iterator<Element> i = bugInstanceList.iterator(); i
756:                            .hasNext();) {
757:                        Element element = i.next();
758:                        String annotationText = element
759:                                .valueOf("./UserAnnotation[text()]");
760:                        int classification = getBugClassification(annotationText);
761:                        if (classification == UNCLASSIFIED)
762:                            i.remove();
763:                    }
764:                }
765:                return bugInstanceList;
766:            }
767:
768:            private static class C2ACommandLine extends CommandLine {
769:                private ConvertToARFF converter = new ConvertToARFF();
770:
771:                public C2ACommandLine() {
772:                    addOption("-select", "xpath expression",
773:                            "select BugInstance elements");
774:                    addSwitch("-train", "drop unclassified warnings");
775:                    addSwitch("-id", "add unique id attribute (as nominal)");
776:                    addSwitch("-ids", "add unique id attribute (as string)");
777:                    addSwitch("-idr",
778:                            "add random unique id attribtue (as nominal)");
779:                    addSwitch("-app", "add application name attribute");
780:                    addOption("-nominal", "attrName,xpath",
781:                            "add a nominal attribute");
782:                    addOption("-boolean", "attrName,xpath",
783:                            "add a boolean attribute");
784:                    addOption("-numeric", "attrName,xpath",
785:                            "add a numeric attribute");
786:                    addSwitch("-classification",
787:                            "add bug classification attribute");
788:                    addSwitch("-binclass",
789:                            "add binary (bug/not_bug) classification attribute");
790:                    addSwitch("-priority", "add priority attribute");
791:                    addOption("-appname", "app name",
792:                            "set application name of all tuples");
793:                }
794:
795:                public ConvertToARFF getConverter() {
796:                    return converter;
797:                }
798:
799:                @Override
800:                protected void handleOption(String option,
801:                        String optionExtraPart) throws IOException {
802:                    if (option.equals("-train")) {
803:                        converter.dropUnclassifiedWarnings();
804:                    } else if (option.equals("-id")) {
805:                        converter.addIdAttribute();
806:                    } else if (option.equals("-ids")) {
807:                        converter.addAttribute(new IdStringAttribute());
808:                    } else if (option.equals("-idr")) {
809:                        converter.addAttribute(new RandomIdAttribute());
810:                    } else if (option.equals("-app")) {
811:                        converter.addAppNameAttribute();
812:                    } else if (option.equals("-classification")) {
813:                        converter.addClassificationAttribute();
814:                    } else if (option.equals("-binclass")) {
815:                        converter
816:                                .addAttribute(new BinaryClassificationAttribute());
817:                    } else if (option.equals("-priority")) {
818:                        converter.addPriorityAttribute();
819:                    }
820:                }
821:
822:                private interface XPathAttributeCreator {
823:                    public Attribute create(String name, String xpath);
824:                }
825:
826:                @Override
827:                protected void handleOptionWithArgument(String option,
828:                        String argument) throws IOException {
829:
830:                    if (option.equals("-select")) {
831:                        converter.setNodeSelectionXpath(argument);
832:                    } else if (option.equals("-nominal")) {
833:                        addXPathAttribute(option, argument,
834:                                new XPathAttributeCreator() {
835:                                    public Attribute create(String name,
836:                                            String xpath) {
837:                                        return new NominalAttribute(name, xpath);
838:                                    }
839:                                });
840:                    } else if (option.equals("-boolean")) {
841:                        addXPathAttribute(option, argument,
842:                                new XPathAttributeCreator() {
843:                                    public Attribute create(String name,
844:                                            String xpath) {
845:                                        return new BooleanAttribute(name, xpath);
846:                                    }
847:                                });
848:                    } else if (option.equals("-numeric")) {
849:                        addXPathAttribute(option, argument,
850:                                new XPathAttributeCreator() {
851:                                    public Attribute create(String name,
852:                                            String xpath) {
853:                                        return new NumericAttribute(name, xpath);
854:                                    }
855:                                });
856:                    } else if (option.equals("-appname")) {
857:                        converter.setAppName(argument);
858:                    }
859:                }
860:
861:                protected void addXPathAttribute(String option,
862:                        String argument, XPathAttributeCreator creator) {
863:                    int comma = argument.indexOf(',');
864:                    if (comma < 0) {
865:                        throw new IllegalArgumentException(
866:                                "Missing comma separating attribute name and xpath in "
867:                                        + option + " option: " + argument);
868:                    }
869:                    String attrName = argument.substring(0, comma);
870:                    String xpath = argument.substring(comma + 1);
871:                    converter.addAttribute(creator.create(attrName, xpath));
872:                }
873:
874:                public void printUsage(PrintStream out) {
875:                    out
876:                            .println("Usage: "
877:                                    + ConvertToARFF.class.getName()
878:                                    + " [options] <relation name> <output file> <findbugs results> [<findbugs results>...]");
879:                    super .printUsage(out);
880:                }
881:            }
882:
883:            public String toAppName(String fileName) {
884:                if (appName != null)
885:                    return appName;
886:
887:                // Remove file extension, if any
888:                int lastDot = fileName.lastIndexOf('.');
889:                if (lastDot >= 0)
890:                    fileName = fileName.substring(0, lastDot);
891:                return fileName;
892:            }
893:
894:            public static void main(String[] argv) throws Exception {
895:                // Expand any option files
896:                argv = CommandLine.expandOptionFiles(argv, true, true);
897:
898:                // Parse command line arguments
899:                C2ACommandLine commandLine = new C2ACommandLine();
900:                int argCount = commandLine.parse(argv);
901:                if (argCount > argv.length - 3) {
902:                    commandLine.printUsage(System.err);
903:                    System.exit(1);
904:                }
905:                String relationName = argv[argCount++];
906:                String outputFileName = argv[argCount++];
907:
908:                // Create the converter
909:                ConvertToARFF converter = commandLine.getConverter();
910:                if (converter.getNumAttributes() == 0) {
911:                    throw new IllegalArgumentException(
912:                            "No attributes specified!");
913:                }
914:
915:                // Open output file
916:                Writer out = new OutputStreamWriter(new BufferedOutputStream(
917:                        new FileOutputStream(outputFileName)));
918:
919:                // Read documents,
920:                // scan documents to find ranges of attributes
921:                List<DataFile> dataFileList = new ArrayList<DataFile>();
922:                while (argCount < argv.length) {
923:                    String fileName = argv[argCount++];
924:
925:                    // Read input file as dom4j tree
926:                    SAXReader reader = new SAXReader();
927:                    Document document = reader.read(fileName);
928:
929:                    DataFile dataFile = new DataFile(document, converter
930:                            .toAppName(fileName));
931:                    dataFileList.add(dataFile);
932:
933:                    converter.scan(dataFile.getDocument(), dataFile
934:                            .getAppName());
935:                }
936:
937:                // Generate ARFF header
938:                converter.generateHeader(relationName, out);
939:
940:                // Generate instances from each document
941:                for (DataFile dataFile : dataFileList) {
942:                    converter.generateInstances(dataFile.getDocument(),
943:                            dataFile.getAppName(), out);
944:                }
945:
946:                out.close();
947:            }
948:
949:        }
950:
951:        // vim:ts=4
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.