Source Code Cross Referenced for Filter.java in  » Science » weka » weka » filters » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Science » weka » weka.filters 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         *    This program is free software; you can redistribute it and/or modify
0003:         *    it under the terms of the GNU General Public License as published by
0004:         *    the Free Software Foundation; either version 2 of the License, or
0005:         *    (at your option) any later version.
0006:         *
0007:         *    This program is distributed in the hope that it will be useful,
0008:         *    but WITHOUT ANY WARRANTY; without even the implied warranty of
0009:         *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0010:         *    GNU General Public License for more details.
0011:         *
0012:         *    You should have received a copy of the GNU General Public License
0013:         *    along with this program; if not, write to the Free Software
0014:         *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015:         */
0016:
0017:        /*
0018:         *    Filter.java
0019:         *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
0020:         *
0021:         */
0022:
0023:        package weka.filters;
0024:
0025:        import weka.core.Capabilities;
0026:        import weka.core.CapabilitiesHandler;
0027:        import weka.core.Instance;
0028:        import weka.core.Instances;
0029:        import weka.core.Option;
0030:        import weka.core.OptionHandler;
0031:        import weka.core.Queue;
0032:        import weka.core.RelationalLocator;
0033:        import weka.core.SerializedObject;
0034:        import weka.core.StringLocator;
0035:        import weka.core.Utils;
0036:        import weka.core.Capabilities.Capability;
0037:        import weka.core.converters.ConverterUtils.DataSource;
0038:
0039:        import java.io.FileOutputStream;
0040:        import java.io.PrintWriter;
0041:        import java.io.Serializable;
0042:        import java.util.Enumeration;
0043:        import java.util.Iterator;
0044:
0045:        /** 
0046:         * An abstract class for instance filters: objects that take instances
0047:         * as input, carry out some transformation on the instance and then
0048:         * output the instance. The method implementations in this class
0049:         * assume that most of the work will be done in the methods overridden
0050:         * by subclasses.<p>
0051:         *
0052:         * A simple example of filter use. This example doesn't remove
0053:         * instances from the output queue until all instances have been
0054:         * input, so has higher memory consumption than an approach that
0055:         * uses output instances as they are made available:<p>
0056:         *
0057:         * <code> <pre>
0058:         *  Filter filter = ..some type of filter..
0059:         *  Instances instances = ..some instances..
0060:         *  for (int i = 0; i < data.numInstances(); i++) {
0061:         *    filter.input(data.instance(i));
0062:         *  }
0063:         *  filter.batchFinished();
0064:         *  Instances newData = filter.outputFormat();
0065:         *  Instance processed;
0066:         *  while ((processed = filter.output()) != null) {
0067:         *    newData.add(processed);
0068:         *  }
0069:         *  ..do something with newData..
0070:         * </pre> </code>
0071:         *
0072:         * @author Len Trigg (trigg@cs.waikato.ac.nz)
0073:         * @version $Revision: 1.38 $
0074:         */
0075:        public abstract class Filter implements  Serializable,
0076:                CapabilitiesHandler {
0077:
0078:            /** for serialization */
0079:            private static final long serialVersionUID = -8835063755891851218L;
0080:
0081:            /** The output format for instances */
0082:            private Instances m_OutputFormat = null;
0083:
0084:            /** The output instance queue */
0085:            private Queue m_OutputQueue = null;
0086:
0087:            /** Indices of string attributes in the output format */
0088:            protected StringLocator m_OutputStringAtts = null;
0089:
0090:            /** Indices of string attributes in the input format */
0091:            protected StringLocator m_InputStringAtts = null;
0092:
0093:            /** Indices of relational attributes in the output format */
0094:            protected RelationalLocator m_OutputRelAtts = null;
0095:
0096:            /** Indices of relational attributes in the input format */
0097:            protected RelationalLocator m_InputRelAtts = null;
0098:
0099:            /** The input format for instances */
0100:            private Instances m_InputFormat = null;
0101:
0102:            /** Record whether the filter is at the start of a batch */
0103:            protected boolean m_NewBatch = true;
0104:
0105:            /** True if the first batch has been done */
0106:            protected boolean m_FirstBatchDone = false;
0107:
0108:            /**
0109:             * Returns true if the a new batch was started, either a new instance of the 
0110:             * filter was created or the batchFinished() method got called.
0111:             * 
0112:             * @return true if a new batch has been initiated
0113:             * @see #m_NewBatch
0114:             * @see #batchFinished()
0115:             */
0116:            public boolean isNewBatch() {
0117:                return m_NewBatch;
0118:            }
0119:
0120:            /**
0121:             * Returns true if the first batch of instances got processed. Necessary for
0122:             * supervised filters, which "learn" from the first batch and then shouldn't
0123:             * get updated with subsequent calls of batchFinished().
0124:             * 
0125:             * @return true if the first batch has been processed
0126:             * @see #m_FirstBatchDone
0127:             * @see #batchFinished()
0128:             */
0129:            public boolean isFirstBatchDone() {
0130:                return m_FirstBatchDone;
0131:            }
0132:
0133:            /** 
0134:             * Returns the Capabilities of this filter. Derived filters have to
0135:             * override this method to enable capabilities.
0136:             *
0137:             * @return            the capabilities of this object
0138:             * @see               Capabilities
0139:             */
0140:            public Capabilities getCapabilities() {
0141:                Capabilities result;
0142:
0143:                result = new Capabilities(this );
0144:                result.setMinimumNumberInstances(0);
0145:
0146:                return result;
0147:            }
0148:
0149:            /** 
0150:             * Returns the Capabilities of this filter, customized based on the data.
0151:             * I.e., if removes all class capabilities, in case there's not class
0152:             * attribute present or removes the NO_CLASS capability, in case that
0153:             * there's a class present.
0154:             *
0155:             * @param data	the data to use for customization
0156:             * @return            the capabilities of this object, based on the data
0157:             * @see               #getCapabilities()
0158:             */
0159:            public Capabilities getCapabilities(Instances data) {
0160:                Capabilities result;
0161:                Capabilities classes;
0162:                Iterator iter;
0163:                Capability cap;
0164:
0165:                result = getCapabilities();
0166:
0167:                // no class? -> remove all class capabilites apart from NO_CLASS
0168:                if (data.classIndex() == -1) {
0169:                    classes = result.getClassCapabilities();
0170:                    iter = classes.capabilities();
0171:                    while (iter.hasNext()) {
0172:                        cap = (Capability) iter.next();
0173:                        if (cap != Capability.NO_CLASS) {
0174:                            result.disable(cap);
0175:                            result.disableDependency(cap);
0176:                        }
0177:                    }
0178:                }
0179:                // class? -> remove NO_CLASS
0180:                else {
0181:                    result.disable(Capability.NO_CLASS);
0182:                    result.disableDependency(Capability.NO_CLASS);
0183:                }
0184:
0185:                return result;
0186:            }
0187:
0188:            /**
0189:             * Sets the format of output instances. The derived class should use this
0190:             * method once it has determined the outputformat. The 
0191:             * output queue is cleared.
0192:             *
0193:             * @param outputFormat the new output format
0194:             */
0195:            protected void setOutputFormat(Instances outputFormat) {
0196:
0197:                if (outputFormat != null) {
0198:                    m_OutputFormat = outputFormat.stringFreeStructure();
0199:                    initOutputLocators(m_OutputFormat, null);
0200:
0201:                    // Rename the relation
0202:                    String relationName = outputFormat.relationName() + "-"
0203:                            + this .getClass().getName();
0204:                    if (this  instanceof  OptionHandler) {
0205:                        String[] options = ((OptionHandler) this ).getOptions();
0206:                        for (int i = 0; i < options.length; i++) {
0207:                            relationName += options[i].trim();
0208:                        }
0209:                    }
0210:                    m_OutputFormat.setRelationName(relationName);
0211:                } else {
0212:                    m_OutputFormat = null;
0213:                }
0214:                m_OutputQueue = new Queue();
0215:            }
0216:
0217:            /**
0218:             * Gets the currently set inputformat instances. This dataset may contain
0219:             * buffered instances.
0220:             *
0221:             * @return the input Instances.
0222:             */
0223:            protected Instances getInputFormat() {
0224:
0225:                return m_InputFormat;
0226:            }
0227:
0228:            /**
0229:             * Returns a reference to the current input format without
0230:             * copying it.
0231:             *
0232:             * @return a reference to the current input format
0233:             */
0234:            protected Instances inputFormatPeek() {
0235:
0236:                return m_InputFormat;
0237:            }
0238:
0239:            /**
0240:             * Returns a reference to the current output format without
0241:             * copying it.
0242:             *
0243:             * @return a reference to the current output format
0244:             */
0245:            protected Instances outputFormatPeek() {
0246:
0247:                return m_OutputFormat;
0248:            }
0249:
0250:            /**
0251:             * Adds an output instance to the queue. The derived class should use this
0252:             * method for each output instance it makes available. 
0253:             *
0254:             * @param instance the instance to be added to the queue.
0255:             */
0256:            protected void push(Instance instance) {
0257:
0258:                if (instance != null) {
0259:                    if (instance.dataset() != null)
0260:                        copyValues(instance, false);
0261:                    instance.setDataset(m_OutputFormat);
0262:                    m_OutputQueue.push(instance);
0263:                }
0264:            }
0265:
0266:            /**
0267:             * Clears the output queue.
0268:             */
0269:            protected void resetQueue() {
0270:
0271:                m_OutputQueue = new Queue();
0272:            }
0273:
0274:            /**
0275:             * Adds the supplied input instance to the inputformat dataset for
0276:             * later processing.  Use this method rather than
0277:             * getInputFormat().add(instance). Or else. Note that the provided
0278:             * instance gets copied when buffered. 
0279:             *
0280:             * @param instance the <code>Instance</code> to buffer.  
0281:             */
0282:            protected void bufferInput(Instance instance) {
0283:
0284:                if (instance != null) {
0285:                    copyValues(instance, true);
0286:                    m_InputFormat.add(instance);
0287:                }
0288:            }
0289:
0290:            /**
0291:             * Initializes the input attribute locators. If indices is null then all 
0292:             * attributes of the data will be considered, otherwise only the ones
0293:             * that were provided.
0294:             * 
0295:             * @param data		the data to initialize the locators with
0296:             * @param indices		if not null, the indices to which to restrict
0297:             * 				the locating
0298:             */
0299:            protected void initInputLocators(Instances data, int[] indices) {
0300:                if (indices == null) {
0301:                    m_InputStringAtts = new StringLocator(data);
0302:                    m_InputRelAtts = new RelationalLocator(data);
0303:                } else {
0304:                    m_InputStringAtts = new StringLocator(data, indices);
0305:                    m_InputRelAtts = new RelationalLocator(data, indices);
0306:                }
0307:            }
0308:
0309:            /**
0310:             * Initializes the output attribute locators. If indices is null then all 
0311:             * attributes of the data will be considered, otherwise only the ones
0312:             * that were provided.
0313:             * 
0314:             * @param data		the data to initialize the locators with
0315:             * @param indices		if not null, the indices to which to restrict
0316:             * 				the locating
0317:             */
0318:            protected void initOutputLocators(Instances data, int[] indices) {
0319:                if (indices == null) {
0320:                    m_OutputStringAtts = new StringLocator(data);
0321:                    m_OutputRelAtts = new RelationalLocator(data);
0322:                } else {
0323:                    m_OutputStringAtts = new StringLocator(data, indices);
0324:                    m_OutputRelAtts = new RelationalLocator(data, indices);
0325:                }
0326:            }
0327:
0328:            /**
0329:             * Copies string/relational values contained in the instance copied to a new
0330:             * dataset. The Instance must already be assigned to a dataset. This
0331:             * dataset and the destination dataset must have the same structure.
0332:             *
0333:             * @param instance		the Instance containing the string/relational 
0334:             * 				values to copy.
0335:             * @param isInput		if true the input format and input attribute 
0336:             * 				locators are used otherwise the output format 
0337:             * 				and output locators
0338:             */
0339:            protected void copyValues(Instance instance, boolean isInput) {
0340:
0341:                RelationalLocator.copyRelationalValues(instance,
0342:                        (isInput) ? m_InputFormat : m_OutputFormat,
0343:                        (isInput) ? m_InputRelAtts : m_OutputRelAtts);
0344:
0345:                StringLocator.copyStringValues(instance,
0346:                        (isInput) ? m_InputFormat : m_OutputFormat,
0347:                        (isInput) ? m_InputStringAtts : m_OutputStringAtts);
0348:            }
0349:
0350:            /**
0351:             * Takes string/relational values referenced by an Instance and copies them 
0352:             * from a source dataset to a destination dataset. The instance references are
0353:             * updated to be valid for the destination dataset. The instance may have the 
0354:             * structure (i.e. number and attribute position) of either dataset (this
0355:             * affects where references are obtained from). Only works if the number
0356:             * of string/relational attributes is the same in both indices (implicitly 
0357:             * these string/relational attributes should be semantically same but just 
0358:             * with shifted positions).
0359:             *
0360:             * @param instance 		the instance containing references to strings/
0361:             * 				relational values in the source dataset that 
0362:             * 				will have references updated to be valid for 
0363:             * 				the destination dataset.
0364:             * @param instSrcCompat 	true if the instance structure is the same as 
0365:             * 				the source, or false if it is the same as the 
0366:             * 				destination (i.e. which of the string/relational 
0367:             * 				attribute indices contains the correct locations 
0368:             * 				for this instance).
0369:             * @param srcDataset 		the dataset for which the current instance 
0370:             * 				string/relational value references are valid 
0371:             * 				(after any position mapping if needed)
0372:             * @param destDataset 	the dataset for which the current instance 
0373:             * 				string/relational value references need to be 
0374:             * 				inserted (after any position mapping if needed)
0375:             */
0376:            protected void copyValues(Instance instance, boolean instSrcCompat,
0377:                    Instances srcDataset, Instances destDataset) {
0378:
0379:                RelationalLocator.copyRelationalValues(instance, instSrcCompat,
0380:                        srcDataset, m_InputRelAtts, destDataset,
0381:                        m_OutputRelAtts);
0382:
0383:                StringLocator.copyStringValues(instance, instSrcCompat,
0384:                        srcDataset, m_InputStringAtts, getOutputFormat(),
0385:                        m_OutputStringAtts);
0386:            }
0387:
0388:            /**
0389:             * This will remove all buffered instances from the inputformat dataset.
0390:             * Use this method rather than getInputFormat().delete();
0391:             */
0392:            protected void flushInput() {
0393:
0394:                if ((m_InputStringAtts.getAttributeIndices().length > 0)
0395:                        || (m_InputRelAtts.getAttributeIndices().length > 0)) {
0396:                    m_InputFormat = m_InputFormat.stringFreeStructure();
0397:                } else {
0398:                    // This more efficient than new Instances(m_InputFormat, 0);
0399:                    m_InputFormat.delete();
0400:                }
0401:            }
0402:
0403:            /**
0404:             * tests the data whether the filter can actually handle it
0405:             * 
0406:             * @param instanceInfo	the data to test
0407:             * @throws Exception		if the test fails
0408:             */
0409:            protected void testInputFormat(Instances instanceInfo)
0410:                    throws Exception {
0411:                getCapabilities(instanceInfo).testWithFail(instanceInfo);
0412:            }
0413:
0414:            /**
0415:             * Sets the format of the input instances. If the filter is able to
0416:             * determine the output format before seeing any input instances, it
0417:             * does so here. This default implementation clears the output format
0418:             * and output queue, and the new batch flag is set. Overriders should
0419:             * call <code>super.setInputFormat(Instances)</code>
0420:             *
0421:             * @param instanceInfo an Instances object containing the input instance
0422:             * structure (any instances contained in the object are ignored - only the
0423:             * structure is required).
0424:             * @return true if the outputFormat may be collected immediately
0425:             * @throws Exception if the inputFormat can't be set successfully 
0426:             */
0427:            public boolean setInputFormat(Instances instanceInfo)
0428:                    throws Exception {
0429:
0430:                testInputFormat(instanceInfo);
0431:
0432:                m_InputFormat = instanceInfo.stringFreeStructure();
0433:                m_OutputFormat = null;
0434:                m_OutputQueue = new Queue();
0435:                m_NewBatch = true;
0436:                m_FirstBatchDone = false;
0437:                initInputLocators(m_InputFormat, null);
0438:                return false;
0439:            }
0440:
0441:            /**
0442:             * Gets the format of the output instances. This should only be called
0443:             * after input() or batchFinished() has returned true. The relation
0444:             * name of the output instances should be changed to reflect the
0445:             * action of the filter (eg: add the filter name and options).
0446:             *
0447:             * @return an Instances object containing the output instance
0448:             * structure only.
0449:             * @throws NullPointerException if no input structure has been
0450:             * defined (or the output format hasn't been determined yet) 
0451:             */
0452:            public Instances getOutputFormat() {
0453:
0454:                if (m_OutputFormat == null) {
0455:                    throw new NullPointerException("No output format defined.");
0456:                }
0457:                return new Instances(m_OutputFormat, 0);
0458:            }
0459:
0460:            /**
0461:             * Input an instance for filtering. Ordinarily the instance is
0462:             * processed and made available for output immediately. Some filters
0463:             * require all instances be read before producing output, in which
0464:             * case output instances should be collected after calling
0465:             * batchFinished(). If the input marks the start of a new batch, the
0466:             * output queue is cleared. This default implementation assumes all
0467:             * instance conversion will occur when batchFinished() is called.
0468:             *
0469:             * @param instance the input instance
0470:             * @return true if the filtered instance may now be
0471:             * collected with output().
0472:             * @throws NullPointerException if the input format has not been
0473:             * defined.
0474:             * @throws Exception if the input instance was not of the correct 
0475:             * format or if there was a problem with the filtering.  
0476:             */
0477:            public boolean input(Instance instance) throws Exception {
0478:
0479:                if (m_InputFormat == null) {
0480:                    throw new NullPointerException(
0481:                            "No input instance format defined");
0482:                }
0483:                if (m_NewBatch) {
0484:                    m_OutputQueue = new Queue();
0485:                    m_NewBatch = false;
0486:                }
0487:                bufferInput(instance);
0488:                return false;
0489:            }
0490:
0491:            /**
0492:             * Signify that this batch of input to the filter is finished. If
0493:             * the filter requires all instances prior to filtering, output()
0494:             * may now be called to retrieve the filtered instances. Any
0495:             * subsequent instances filtered should be filtered based on setting
0496:             * obtained from the first batch (unless the inputFormat has been
0497:             * re-assigned or new options have been set). This default
0498:             * implementation assumes all instance processing occurs during
0499:             * inputFormat() and input().
0500:             *
0501:             * @return true if there are instances pending output
0502:             * @throws NullPointerException if no input structure has been defined,
0503:             * @throws Exception if there was a problem finishing the batch.
0504:             */
0505:            public boolean batchFinished() throws Exception {
0506:
0507:                if (m_InputFormat == null) {
0508:                    throw new NullPointerException(
0509:                            "No input instance format defined");
0510:                }
0511:                flushInput();
0512:                m_NewBatch = true;
0513:                m_FirstBatchDone = true;
0514:                return (numPendingOutput() != 0);
0515:            }
0516:
0517:            /**
0518:             * Output an instance after filtering and remove from the output queue.
0519:             *
0520:             * @return the instance that has most recently been filtered (or null if
0521:             * the queue is empty).
0522:             * @throws NullPointerException if no output structure has been defined
0523:             */
0524:            public Instance output() {
0525:
0526:                if (m_OutputFormat == null) {
0527:                    throw new NullPointerException(
0528:                            "No output instance format defined");
0529:                }
0530:                if (m_OutputQueue.empty()) {
0531:                    return null;
0532:                }
0533:                Instance result = (Instance) m_OutputQueue.pop();
0534:                // Clear out references to old strings/relationals occasionally
0535:                if (m_OutputQueue.empty() && m_NewBatch) {
0536:                    if ((m_OutputStringAtts.getAttributeIndices().length > 0)
0537:                            || (m_OutputRelAtts.getAttributeIndices().length > 0)) {
0538:                        m_OutputFormat = m_OutputFormat.stringFreeStructure();
0539:                    }
0540:                }
0541:                return result;
0542:            }
0543:
0544:            /**
0545:             * Output an instance after filtering but do not remove from the
0546:             * output queue.
0547:             *
0548:             * @return the instance that has most recently been filtered (or null if
0549:             * the queue is empty).
0550:             * @throws NullPointerException if no input structure has been defined 
0551:             */
0552:            public Instance outputPeek() {
0553:
0554:                if (m_OutputFormat == null) {
0555:                    throw new NullPointerException(
0556:                            "No output instance format defined");
0557:                }
0558:                if (m_OutputQueue.empty()) {
0559:                    return null;
0560:                }
0561:                Instance result = (Instance) m_OutputQueue.peek();
0562:                return result;
0563:            }
0564:
0565:            /**
0566:             * Returns the number of instances pending output
0567:             *
0568:             * @return the number of instances  pending output
0569:             * @throws NullPointerException if no input structure has been defined
0570:             */
0571:            public int numPendingOutput() {
0572:
0573:                if (m_OutputFormat == null) {
0574:                    throw new NullPointerException(
0575:                            "No output instance format defined");
0576:                }
0577:                return m_OutputQueue.size();
0578:            }
0579:
0580:            /**
0581:             * Returns whether the output format is ready to be collected
0582:             *
0583:             * @return true if the output format is set
0584:             */
0585:            public boolean isOutputFormatDefined() {
0586:
0587:                return (m_OutputFormat != null);
0588:            }
0589:
0590:            /**
0591:             * Creates a deep copy of the given filter using serialization.
0592:             *
0593:             * @param model 	the filter to copy
0594:             * @return 		a deep copy of the filter
0595:             * @throws Exception 	if an error occurs
0596:             */
0597:            public static Filter makeCopy(Filter model) throws Exception {
0598:                return (Filter) new SerializedObject(model).getObject();
0599:            }
0600:
0601:            /**
0602:             * Creates a given number of deep copies of the given filter using 
0603:             * serialization.
0604:             * 
0605:             * @param model 	the filter to copy
0606:             * @param num 	the number of filter copies to create.
0607:             * @return 		an array of filters.
0608:             * @throws Exception 	if an error occurs
0609:             */
0610:            public static Filter[] makeCopies(Filter model, int num)
0611:                    throws Exception {
0612:
0613:                if (model == null) {
0614:                    throw new Exception("No model filter set");
0615:                }
0616:                Filter[] filters = new Filter[num];
0617:                SerializedObject so = new SerializedObject(model);
0618:                for (int i = 0; i < filters.length; i++) {
0619:                    filters[i] = (Filter) so.getObject();
0620:                }
0621:                return filters;
0622:            }
0623:
0624:            /**
0625:             * Filters an entire set of instances through a filter and returns
0626:             * the new set. 
0627:             *
0628:             * @param data the data to be filtered
0629:             * @param filter the filter to be used
0630:             * @return the filtered set of data
0631:             * @throws Exception if the filter can't be used successfully
0632:             */
0633:            public static Instances useFilter(Instances data, Filter filter)
0634:                    throws Exception {
0635:                /*
0636:                System.err.println(filter.getClass().getName() 
0637:                                   + " in:" + data.numInstances());
0638:                 */
0639:                for (int i = 0; i < data.numInstances(); i++) {
0640:                    filter.input(data.instance(i));
0641:                }
0642:                filter.batchFinished();
0643:                Instances newData = filter.getOutputFormat();
0644:                Instance processed;
0645:                while ((processed = filter.output()) != null) {
0646:                    newData.add(processed);
0647:                }
0648:
0649:                /*
0650:                System.err.println(filter.getClass().getName() 
0651:                                   + " out:" + newData.numInstances());
0652:                 */
0653:                return newData;
0654:            }
0655:
0656:            /**
0657:             * Method for testing filters.
0658:             *
0659:             * @param filter the filter to use
0660:             * @param options should contain the following arguments: <br>
0661:             * -i input_file <br>
0662:             * -o output_file <br>
0663:             * -c class_index <br>
0664:             * or -h for help on options
0665:             * @throws Exception if something goes wrong or the user requests help on
0666:             * command options
0667:             */
0668:            public static void filterFile(Filter filter, String[] options)
0669:                    throws Exception {
0670:
0671:                boolean debug = false;
0672:                Instances data = null;
0673:                DataSource input = null;
0674:                PrintWriter output = null;
0675:                boolean helpRequest;
0676:
0677:                try {
0678:                    helpRequest = Utils.getFlag('h', options);
0679:
0680:                    if (Utils.getFlag('d', options)) {
0681:                        debug = true;
0682:                    }
0683:                    String infileName = Utils.getOption('i', options);
0684:                    String outfileName = Utils.getOption('o', options);
0685:                    String classIndex = Utils.getOption('c', options);
0686:
0687:                    if (filter instanceof  OptionHandler) {
0688:                        ((OptionHandler) filter).setOptions(options);
0689:                    }
0690:
0691:                    Utils.checkForRemainingOptions(options);
0692:                    if (helpRequest) {
0693:                        throw new Exception("Help requested.\n");
0694:                    }
0695:                    if (infileName.length() != 0) {
0696:                        input = new DataSource(infileName);
0697:                    } else {
0698:                        input = new DataSource(System.in);
0699:                    }
0700:                    if (outfileName.length() != 0) {
0701:                        output = new PrintWriter(new FileOutputStream(
0702:                                outfileName));
0703:                    } else {
0704:                        output = new PrintWriter(System.out);
0705:                    }
0706:
0707:                    data = input.getStructure();
0708:                    if (classIndex.length() != 0) {
0709:                        if (classIndex.equals("first")) {
0710:                            data.setClassIndex(0);
0711:                        } else if (classIndex.equals("last")) {
0712:                            data.setClassIndex(data.numAttributes() - 1);
0713:                        } else {
0714:                            data
0715:                                    .setClassIndex(Integer.parseInt(classIndex) - 1);
0716:                        }
0717:                    }
0718:                } catch (Exception ex) {
0719:                    String filterOptions = "";
0720:                    // Output the error and also the valid options
0721:                    if (filter instanceof  OptionHandler) {
0722:                        filterOptions += "\nFilter options:\n\n";
0723:                        Enumeration enu = ((OptionHandler) filter)
0724:                                .listOptions();
0725:                        while (enu.hasMoreElements()) {
0726:                            Option option = (Option) enu.nextElement();
0727:                            filterOptions += option.synopsis() + '\n'
0728:                                    + option.description() + "\n";
0729:                        }
0730:                    }
0731:
0732:                    String genericOptions = "\nGeneral options:\n\n"
0733:                            + "-h\n"
0734:                            + "\tGet help on available options.\n"
0735:                            + "\t(use -b -h for help on batch mode.)\n"
0736:                            + "-i <file>\n"
0737:                            + "\tThe name of the file containing input instances.\n"
0738:                            + "\tIf not supplied then instances will be read from stdin.\n"
0739:                            + "-o <file>\n"
0740:                            + "\tThe name of the file output instances will be written to.\n"
0741:                            + "\tIf not supplied then instances will be written to stdout.\n"
0742:                            + "-c <class index>\n"
0743:                            + "\tThe number of the attribute to use as the class.\n"
0744:                            + "\t\"first\" and \"last\" are also valid entries.\n"
0745:                            + "\tIf not supplied then no class is assigned.\n";
0746:
0747:                    throw new Exception('\n' + ex.getMessage() + filterOptions
0748:                            + genericOptions);
0749:                }
0750:
0751:                if (debug) {
0752:                    System.err.println("Setting input format");
0753:                }
0754:                boolean printedHeader = false;
0755:                if (filter.setInputFormat(data)) {
0756:                    if (debug) {
0757:                        System.err.println("Getting output format");
0758:                    }
0759:                    output.println(filter.getOutputFormat().toString());
0760:                    printedHeader = true;
0761:                }
0762:
0763:                // Pass all the instances to the filter
0764:                Instance inst;
0765:                while (input.hasMoreElements(data)) {
0766:                    inst = input.nextElement(data);
0767:                    if (debug) {
0768:                        System.err.println("Input instance to filter");
0769:                    }
0770:                    if (filter.input(inst)) {
0771:                        if (debug) {
0772:                            System.err
0773:                                    .println("Filter said collect immediately");
0774:                        }
0775:                        if (!printedHeader) {
0776:                            throw new Error(
0777:                                    "Filter didn't return true from setInputFormat() "
0778:                                            + "earlier!");
0779:                        }
0780:                        if (debug) {
0781:                            System.err.println("Getting output instance");
0782:                        }
0783:                        output.println(filter.output().toString());
0784:                    }
0785:                }
0786:
0787:                // Say that input has finished, and print any pending output instances
0788:                if (debug) {
0789:                    System.err.println("Setting end of batch");
0790:                }
0791:                if (filter.batchFinished()) {
0792:                    if (debug) {
0793:                        System.err.println("Filter said collect output");
0794:                    }
0795:                    if (!printedHeader) {
0796:                        if (debug) {
0797:                            System.err.println("Getting output format");
0798:                        }
0799:                        output.println(filter.getOutputFormat().toString());
0800:                    }
0801:                    if (debug) {
0802:                        System.err.println("Getting output instance");
0803:                    }
0804:                    while (filter.numPendingOutput() > 0) {
0805:                        output.println(filter.output().toString());
0806:                        if (debug) {
0807:                            System.err.println("Getting output instance");
0808:                        }
0809:                    }
0810:                }
0811:                if (debug) {
0812:                    System.err.println("Done");
0813:                }
0814:
0815:                if (output != null) {
0816:                    output.close();
0817:                }
0818:            }
0819:
0820:            /**
0821:             * Method for testing filters ability to process multiple batches.
0822:             *
0823:             * @param filter the filter to use
0824:             * @param options should contain the following arguments:<br>
0825:             * -i (first) input file <br>
0826:             * -o (first) output file <br>
0827:             * -r (second) input file <br>
0828:             * -s (second) output file <br>
0829:             * -c class_index <br>
0830:             * or -h for help on options
0831:             * @throws Exception if something goes wrong or the user requests help on
0832:             * command options
0833:             */
0834:            public static void batchFilterFile(Filter filter, String[] options)
0835:                    throws Exception {
0836:
0837:                Instances firstData = null;
0838:                Instances secondData = null;
0839:                DataSource firstInput = null;
0840:                DataSource secondInput = null;
0841:                PrintWriter firstOutput = null;
0842:                PrintWriter secondOutput = null;
0843:                boolean helpRequest;
0844:                try {
0845:                    helpRequest = Utils.getFlag('h', options);
0846:
0847:                    String fileName = Utils.getOption('i', options);
0848:                    if (fileName.length() != 0) {
0849:                        firstInput = new DataSource(fileName);
0850:                    } else {
0851:                        throw new Exception("No first input file given.\n");
0852:                    }
0853:
0854:                    fileName = Utils.getOption('r', options);
0855:                    if (fileName.length() != 0) {
0856:                        secondInput = new DataSource(fileName);
0857:                    } else {
0858:                        throw new Exception("No second input file given.\n");
0859:                    }
0860:
0861:                    fileName = Utils.getOption('o', options);
0862:                    if (fileName.length() != 0) {
0863:                        firstOutput = new PrintWriter(new FileOutputStream(
0864:                                fileName));
0865:                    } else {
0866:                        firstOutput = new PrintWriter(System.out);
0867:                    }
0868:
0869:                    fileName = Utils.getOption('s', options);
0870:                    if (fileName.length() != 0) {
0871:                        secondOutput = new PrintWriter(new FileOutputStream(
0872:                                fileName));
0873:                    } else {
0874:                        secondOutput = new PrintWriter(System.out);
0875:                    }
0876:                    String classIndex = Utils.getOption('c', options);
0877:
0878:                    if (filter instanceof  OptionHandler) {
0879:                        ((OptionHandler) filter).setOptions(options);
0880:                    }
0881:                    Utils.checkForRemainingOptions(options);
0882:
0883:                    if (helpRequest) {
0884:                        throw new Exception("Help requested.\n");
0885:                    }
0886:                    firstData = firstInput.getStructure();
0887:                    secondData = secondInput.getStructure();
0888:                    if (!secondData.equalHeaders(firstData)) {
0889:                        throw new Exception("Input file formats differ.\n");
0890:                    }
0891:                    if (classIndex.length() != 0) {
0892:                        if (classIndex.equals("first")) {
0893:                            firstData.setClassIndex(0);
0894:                            secondData.setClassIndex(0);
0895:                        } else if (classIndex.equals("last")) {
0896:                            firstData
0897:                                    .setClassIndex(firstData.numAttributes() - 1);
0898:                            secondData
0899:                                    .setClassIndex(secondData.numAttributes() - 1);
0900:                        } else {
0901:                            firstData.setClassIndex(Integer
0902:                                    .parseInt(classIndex) - 1);
0903:                            secondData.setClassIndex(Integer
0904:                                    .parseInt(classIndex) - 1);
0905:                        }
0906:                    }
0907:                } catch (Exception ex) {
0908:                    String filterOptions = "";
0909:                    // Output the error and also the valid options
0910:                    if (filter instanceof  OptionHandler) {
0911:                        filterOptions += "\nFilter options:\n\n";
0912:                        Enumeration enu = ((OptionHandler) filter)
0913:                                .listOptions();
0914:                        while (enu.hasMoreElements()) {
0915:                            Option option = (Option) enu.nextElement();
0916:                            filterOptions += option.synopsis() + '\n'
0917:                                    + option.description() + "\n";
0918:                        }
0919:                    }
0920:
0921:                    String genericOptions = "\nGeneral options:\n\n"
0922:                            + "-h\n"
0923:                            + "\tGet help on available options.\n"
0924:                            + "-i <filename>\n"
0925:                            + "\tThe file containing first input instances.\n"
0926:                            + "-o <filename>\n"
0927:                            + "\tThe file first output instances will be written to.\n"
0928:                            + "-r <filename>\n"
0929:                            + "\tThe file containing second input instances.\n"
0930:                            + "-s <filename>\n"
0931:                            + "\tThe file second output instances will be written to.\n"
0932:                            + "-c <class index>\n"
0933:                            + "\tThe number of the attribute to use as the class.\n"
0934:                            + "\t\"first\" and \"last\" are also valid entries.\n"
0935:                            + "\tIf not supplied then no class is assigned.\n";
0936:
0937:                    throw new Exception('\n' + ex.getMessage() + filterOptions
0938:                            + genericOptions);
0939:                }
0940:                boolean printedHeader = false;
0941:                if (filter.setInputFormat(firstData)) {
0942:                    firstOutput.println(filter.getOutputFormat().toString());
0943:                    printedHeader = true;
0944:                }
0945:
0946:                // Pass all the instances to the filter
0947:                Instance inst;
0948:                while (firstInput.hasMoreElements(firstData)) {
0949:                    inst = firstInput.nextElement(firstData);
0950:                    if (filter.input(inst)) {
0951:                        if (!printedHeader) {
0952:                            throw new Error(
0953:                                    "Filter didn't return true from setInputFormat() "
0954:                                            + "earlier!");
0955:                        }
0956:                        firstOutput.println(filter.output().toString());
0957:                    }
0958:                }
0959:
0960:                // Say that input has finished, and print any pending output instances
0961:                if (filter.batchFinished()) {
0962:                    if (!printedHeader) {
0963:                        firstOutput
0964:                                .println(filter.getOutputFormat().toString());
0965:                    }
0966:                    while (filter.numPendingOutput() > 0) {
0967:                        firstOutput.println(filter.output().toString());
0968:                    }
0969:                }
0970:
0971:                if (firstOutput != null) {
0972:                    firstOutput.close();
0973:                }
0974:                printedHeader = false;
0975:                if (filter.isOutputFormatDefined()) {
0976:                    secondOutput.println(filter.getOutputFormat().toString());
0977:                    printedHeader = true;
0978:                }
0979:                // Pass all the second instances to the filter
0980:                while (secondInput.hasMoreElements(secondData)) {
0981:                    inst = secondInput.nextElement(secondData);
0982:                    if (filter.input(inst)) {
0983:                        if (!printedHeader) {
0984:                            throw new Error("Filter didn't return true from"
0985:                                    + " isOutputFormatDefined() earlier!");
0986:                        }
0987:                        secondOutput.println(filter.output().toString());
0988:                    }
0989:                }
0990:
0991:                // Say that input has finished, and print any pending output instances
0992:                if (filter.batchFinished()) {
0993:                    if (!printedHeader) {
0994:                        secondOutput.println(filter.getOutputFormat()
0995:                                .toString());
0996:                    }
0997:                    while (filter.numPendingOutput() > 0) {
0998:                        secondOutput.println(filter.output().toString());
0999:                    }
1000:                }
1001:                if (secondOutput != null) {
1002:                    secondOutput.close();
1003:                }
1004:            }
1005:
1006:            /**
1007:             * runs the filter instance with the given options.
1008:             * 
1009:             * @param filter	the filter to run
1010:             * @param options	the commandline options
1011:             */
1012:            protected static void runFilter(Filter filter, String[] options) {
1013:                try {
1014:                    if (Utils.getFlag('b', options)) {
1015:                        Filter.batchFilterFile(filter, options);
1016:                    } else {
1017:                        Filter.filterFile(filter, options);
1018:                    }
1019:                } catch (Exception e) {
1020:                    if ((e.toString().indexOf("Help requested") == -1)
1021:                            && (e.toString().indexOf("Filter options") == -1))
1022:                        e.printStackTrace();
1023:                    else
1024:                        System.err.println(e.getMessage());
1025:                }
1026:            }
1027:
1028:            /**
1029:             * Main method for testing this class.
1030:             *
1031:             * @param args should contain arguments to the filter: use -h for help
1032:             */
1033:            public static void main(String[] args) {
1034:
1035:                try {
1036:                    if (args.length == 0) {
1037:                        throw new Exception(
1038:                                "First argument must be the class name of a Filter");
1039:                    }
1040:                    String fname = args[0];
1041:                    Filter f = (Filter) Class.forName(fname).newInstance();
1042:                    args[0] = "";
1043:                    runFilter(f, args);
1044:                } catch (Exception ex) {
1045:                    ex.printStackTrace();
1046:                    System.err.println(ex.getMessage());
1047:                }
1048:            }
1049:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.