001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * SearchAlgorithm.java
019: * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand
020: *
021: */
022: package weka.classifiers.bayes.net.search;
023:
024: import weka.classifiers.bayes.BayesNet;
025: import weka.classifiers.bayes.net.ParentSet;
026: import weka.core.Instances;
027: import weka.core.OptionHandler;
028:
029: import java.io.Serializable;
030: import java.util.Enumeration;
031: import java.util.Vector;
032:
033: /**
034: * This is the base class for all search algorithms for learning Bayes networks.
035: * It contains some common code, used by other network structure search algorithms,
036: * and should not be used by itself.
037: *
038: <!-- options-start -->
039: <!-- options-end -->
040: *
041: * @author Remco Bouckaert
042: * @version $Revision: 1.7 $
043: */
044: public class SearchAlgorithm implements OptionHandler, Serializable {
045:
046: /** for serialization */
047: static final long serialVersionUID = 6164792240778525312L;
048:
049: /**
050: * Holds upper bound on number of parents
051: */
052: protected int m_nMaxNrOfParents = 1;
053:
054: /**
055: * determines whether initial structure is an empty graph or a Naive Bayes network
056: */
057: protected boolean m_bInitAsNaiveBayes = true;
058:
059: /**
060: * Determines whether after structure is found a MarkovBlanketClassifier correction should be applied
061: * If this is true, m_bInitAsNaiveBayes is overridden and interpreted as false.
062: */
063: protected boolean m_bMarkovBlanketClassifier = false;
064:
065: /** c'tor **/
066: public SearchAlgorithm() {
067: } // SearchAlgorithm
068:
069: /**
070: * AddArcMakesSense checks whether adding the arc from iAttributeTail to iAttributeHead
071: * does not already exists and does not introduce a cycle
072: *
073: * @param bayesNet
074: * @param instances
075: * @param iAttributeHead index of the attribute that becomes head of the arrow
076: * @param iAttributeTail index of the attribute that becomes tail of the arrow
077: * @return true if adding arc is allowed, otherwise false
078: */
079: protected boolean addArcMakesSense(BayesNet bayesNet,
080: Instances instances, int iAttributeHead, int iAttributeTail) {
081: if (iAttributeHead == iAttributeTail) {
082: return false;
083: }
084:
085: // sanity check: arc should not be in parent set already
086: if (isArc(bayesNet, iAttributeHead, iAttributeTail)) {
087: return false;
088: }
089:
090: // sanity check: arc should not introduce a cycle
091: int nNodes = instances.numAttributes();
092: boolean[] bDone = new boolean[nNodes];
093:
094: for (int iNode = 0; iNode < nNodes; iNode++) {
095: bDone[iNode] = false;
096: }
097:
098: // check for cycles
099: bayesNet.getParentSet(iAttributeHead).addParent(iAttributeTail,
100: instances);
101:
102: for (int iNode = 0; iNode < nNodes; iNode++) {
103:
104: // find a node for which all parents are 'done'
105: boolean bFound = false;
106:
107: for (int iNode2 = 0; !bFound && iNode2 < nNodes; iNode2++) {
108: if (!bDone[iNode2]) {
109: boolean bHasNoParents = true;
110:
111: for (int iParent = 0; iParent < bayesNet
112: .getParentSet(iNode2).getNrOfParents(); iParent++) {
113: if (!bDone[bayesNet.getParentSet(iNode2)
114: .getParent(iParent)]) {
115: bHasNoParents = false;
116: }
117: }
118:
119: if (bHasNoParents) {
120: bDone[iNode2] = true;
121: bFound = true;
122: }
123: }
124: }
125:
126: if (!bFound) {
127: bayesNet.getParentSet(iAttributeHead).deleteLastParent(
128: instances);
129:
130: return false;
131: }
132: }
133:
134: bayesNet.getParentSet(iAttributeHead).deleteLastParent(
135: instances);
136:
137: return true;
138: } // AddArcMakesCycle
139:
140: /**
141: * reverseArcMakesSense checks whether the arc from iAttributeTail to
142: * iAttributeHead exists and reversing does not introduce a cycle
143: *
144: * @param bayesNet
145: * @param instances
146: * @param iAttributeHead index of the attribute that is head of the arrow
147: * @param iAttributeTail index of the attribute that is tail of the arrow
148: * @return true if the arc from iAttributeTail to iAttributeHead exists and reversing does not introduce a cycle
149: */
150: protected boolean reverseArcMakesSense(BayesNet bayesNet,
151: Instances instances, int iAttributeHead, int iAttributeTail) {
152:
153: if (iAttributeHead == iAttributeTail) {
154: return false;
155: }
156:
157: // sanity check: arc should be in parent set already
158: if (!isArc(bayesNet, iAttributeHead, iAttributeTail)) {
159: return false;
160: }
161:
162: // sanity check: arc should not introduce a cycle
163: int nNodes = instances.numAttributes();
164: boolean[] bDone = new boolean[nNodes];
165:
166: for (int iNode = 0; iNode < nNodes; iNode++) {
167: bDone[iNode] = false;
168: }
169:
170: // check for cycles
171: bayesNet.getParentSet(iAttributeTail).addParent(iAttributeHead,
172: instances);
173:
174: for (int iNode = 0; iNode < nNodes; iNode++) {
175:
176: // find a node for which all parents are 'done'
177: boolean bFound = false;
178:
179: for (int iNode2 = 0; !bFound && iNode2 < nNodes; iNode2++) {
180: if (!bDone[iNode2]) {
181: ParentSet parentSet = bayesNet.getParentSet(iNode2);
182: boolean bHasNoParents = true;
183: for (int iParent = 0; iParent < parentSet
184: .getNrOfParents(); iParent++) {
185: if (!bDone[parentSet.getParent(iParent)]) {
186:
187: // this one has a parent which is not 'done' UNLESS it is the arc to be reversed
188: if (!(iNode2 == iAttributeHead && parentSet
189: .getParent(iParent) == iAttributeTail)) {
190: bHasNoParents = false;
191: }
192: }
193: }
194:
195: if (bHasNoParents) {
196: bDone[iNode2] = true;
197: bFound = true;
198: }
199: }
200: }
201:
202: if (!bFound) {
203: bayesNet.getParentSet(iAttributeTail).deleteLastParent(
204: instances);
205: return false;
206: }
207: }
208:
209: bayesNet.getParentSet(iAttributeTail).deleteLastParent(
210: instances);
211: return true;
212: } // ReverseArcMakesCycle
213:
214: /**
215: * IsArc checks whether the arc from iAttributeTail to iAttributeHead already exists
216: *
217: * @param bayesNet
218: * @param iAttributeHead index of the attribute that becomes head of the arrow
219: * @param iAttributeTail index of the attribute that becomes tail of the arrow
220: * @return true if the arc from iAttributeTail to iAttributeHead already exists
221: */
222: protected boolean isArc(BayesNet bayesNet, int iAttributeHead,
223: int iAttributeTail) {
224: for (int iParent = 0; iParent < bayesNet.getParentSet(
225: iAttributeHead).getNrOfParents(); iParent++) {
226: if (bayesNet.getParentSet(iAttributeHead)
227: .getParent(iParent) == iAttributeTail) {
228: return true;
229: }
230: }
231:
232: return false;
233: } // IsArc
234:
235: /**
236: * Returns an enumeration describing the available options.
237: *
238: * @return an enumeration of all the available options.
239: */
240: public Enumeration listOptions() {
241: return new Vector(0).elements();
242: } // listOption
243:
244: /**
245: * Parses a given list of options. <p/>
246: *
247: * @param options the list of options as an array of strings
248: * @throws Exception if an option is not supported
249: */
250: public void setOptions(String[] options) throws Exception {
251: } // setOptions
252:
253: /**
254: * Gets the current settings of the Classifier.
255: *
256: * @return an array of strings suitable for passing to setOptions
257: */
258: public String[] getOptions() {
259: return new String[0];
260: } // getOptions
261:
262: /**
263: * a string representation of the algorithm
264: *
265: * @return a string representation
266: */
267: public String toString() {
268: return "SearchAlgorithm\n";
269: } // toString
270:
271: /**
272: * buildStructure determines the network structure/graph of the network.
273: * The default behavior is creating a network where all nodes have the first
274: * node as its parent (i.e., a BayesNet that behaves like a naive Bayes classifier).
275: * This method can be overridden by derived classes to restrict the class
276: * of network structures that are acceptable.
277: *
278: * @param bayesNet the network
279: * @param instances the data to use
280: * @throws Exception if something goes wrong
281: */
282: public void buildStructure(BayesNet bayesNet, Instances instances)
283: throws Exception {
284: if (m_bInitAsNaiveBayes) {
285: int iClass = instances.classIndex();
286: // initialize parent sets to have arrow from classifier node to
287: // each of the other nodes
288: for (int iAttribute = 0; iAttribute < instances
289: .numAttributes(); iAttribute++) {
290: if (iAttribute != iClass) {
291: bayesNet.getParentSet(iAttribute).addParent(iClass,
292: instances);
293: }
294: }
295: }
296: search(bayesNet, instances);
297: if (m_bMarkovBlanketClassifier) {
298: doMarkovBlanketCorrection(bayesNet, instances);
299: }
300: } // buildStructure
301:
302: /**
303: *
304: * @param bayesNet
305: * @param instances
306: */
307: protected void search(BayesNet bayesNet, Instances instances)
308: throws Exception {
309: // placeholder with implementation in derived classes
310: } // search
311:
312: /**
313: * for each node in the network make sure it is in the
314: * Markov blanket of the classifier node, and if not,
315: * add arrows so that it is. If the node is an ancestor
316: * of the classifier node, add arrow pointing to the classifier
317: * node, otherwise, add arrow pointing to attribute node.
318: *
319: * @param bayesNet
320: * @param instances
321: */
322: protected void doMarkovBlanketCorrection(BayesNet bayesNet,
323: Instances instances) {
324: // Add class node as parent if it is not in the Markov Boundary
325: int iClass = instances.classIndex();
326: ParentSet ancestors = new ParentSet();
327: int nOldSize = 0;
328: ancestors.addParent(iClass, instances);
329: while (nOldSize != ancestors.getNrOfParents()) {
330: nOldSize = ancestors.getNrOfParents();
331: for (int iNode = 0; iNode < nOldSize; iNode++) {
332: int iCurrent = ancestors.getParent(iNode);
333: ParentSet p = bayesNet.getParentSet(iCurrent);
334: for (int iParent = 0; iParent < p.getNrOfParents(); iParent++) {
335: if (!ancestors.contains(p.getParent(iParent))) {
336: ancestors.addParent(p.getParent(iParent),
337: instances);
338: }
339: }
340: }
341: }
342:
343: for (int iAttribute = 0; iAttribute < instances.numAttributes(); iAttribute++) {
344: boolean bIsInMarkovBoundary = (iAttribute == iClass);
345: bIsInMarkovBoundary = bayesNet.getParentSet(iAttribute)
346: .contains(iClass)
347: || bayesNet.getParentSet(iClass).contains(
348: iAttribute);
349: for (int iAttribute2 = 0; !bIsInMarkovBoundary
350: && iAttribute2 < instances.numAttributes(); iAttribute2++) {
351: bIsInMarkovBoundary = bayesNet
352: .getParentSet(iAttribute2).contains(iAttribute)
353: && bayesNet.getParentSet(iAttribute2).contains(
354: iClass);
355: }
356: if (!bIsInMarkovBoundary) {
357: if (ancestors.contains(iAttribute)
358: && bayesNet.getParentSet(iClass)
359: .getCardinalityOfParents() < 1024) {
360: bayesNet.getParentSet(iClass).addParent(iAttribute,
361: instances);
362: } else {
363: bayesNet.getParentSet(iAttribute).addParent(iClass,
364: instances);
365: }
366: }
367: }
368: } // doMarkovBlanketCorrection
369:
370: /**
371: *
372: * @param bMarkovBlanketClassifier
373: */
374: protected void setMarkovBlanketClassifier(
375: boolean bMarkovBlanketClassifier) {
376: m_bMarkovBlanketClassifier = bMarkovBlanketClassifier;
377: }
378:
379: /**
380: *
381: * @return
382: */
383: protected boolean getMarkovBlanketClassifier() {
384: return m_bMarkovBlanketClassifier;
385: }
386:
387: /**
388: * @return a string to describe the MaxNrOfParentsoption.
389: */
390: public String maxNrOfParentsTipText() {
391: return "Set the maximum number of parents a node in the Bayes net can have."
392: + " When initialized as Naive Bayes, setting this parameter to 1 results in"
393: + " a Naive Bayes classifier. When set to 2, a Tree Augmented Bayes Network (TAN)"
394: + " is learned, and when set >2, a Bayes Net Augmented Bayes Network (BAN)"
395: + " is learned. By setting it to a value much larger than the number of nodes"
396: + " in the network (the default of 100000 pretty much guarantees this), no"
397: + " restriction on the number of parents is enforced";
398: } // maxNrOfParentsTipText
399:
400: /**
401: * @return a string to describe the InitAsNaiveBayes option.
402: */
403: public String initAsNaiveBayesTipText() {
404: return "When set to true (default), the initial network used for structure learning"
405: + " is a Naive Bayes Network, that is, a network with an arrow from the classifier"
406: + " node to each other node. When set to false, an empty network is used as initial"
407: + " network structure";
408: } // initAsNaiveBayesTipText
409:
410: /**
411: * @return a string to describe the MarkovBlanketClassifier option.
412: */
413: protected String markovBlanketClassifierTipText() {
414: return "When set to true (default is false), after a network structure is learned"
415: + " a Markov Blanket correction is applied to the network structure. This ensures"
416: + " that all nodes in the network are part of the Markov blanket of the classifier"
417: + " node.";
418: } // markovBlanketClassifierTipText
419: } // class SearchAlgorithm
|