001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
015: */
016:
017: /*
018: * SlidingMidPointOfWidestSide.java
019: * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
020: */
021:
022: package weka.core.neighboursearch.kdtrees;
023:
024: import weka.core.TechnicalInformation;
025: import weka.core.TechnicalInformationHandler;
026: import weka.core.TechnicalInformation.Field;
027: import weka.core.TechnicalInformation.Type;
028:
029: /**
030: <!-- globalinfo-start -->
031: * The class that splits a node into two based on the midpoint value of the dimension in which the node's rectangle is widest. If after splitting one side is empty then it is slided towards the non-empty side until there is at least one point on the empty side.<br/>
032: * <br/>
033: * For more information see also:<br/>
034: * <br/>
035: * David M. Mount (2006). ANN Programming Manual. College Park, MD, USA.
036: * <p/>
037: <!-- globalinfo-end -->
038: *
039: <!-- technical-bibtex-start -->
040: * BibTeX:
041: * <pre>
042: * @manual{Mount2006,
043: * address = {College Park, MD, USA},
044: * author = {David M. Mount},
045: * organization = {Department of Computer Science, University of Maryland},
046: * title = {ANN Programming Manual},
047: * year = {2006},
048: * HTTP = {Available from http://www.cs.umd.edu/~mount/ANN/}
049: * }
050: * </pre>
051: * <p/>
052: <!-- technical-bibtex-end -->
053: *
054: <!-- options-start -->
055: <!-- options-end -->
056: *
057: * @author Ashraf M. Kibriya (amk14@waikato.ac.nz)
058: * @version $Revision: 1.1 $
059: */
060: public class SlidingMidPointOfWidestSide extends KDTreeNodeSplitter
061: implements TechnicalInformationHandler {
062:
063: /** for serialization. */
064: private static final long serialVersionUID = 852857628205680562L;
065:
066: /** The floating point error to tolerate in finding the widest
067: * rectangular side. */
068: protected static double ERR = 0.001;
069:
070: /**
071: * Returns a string describing this nearest neighbour search algorithm.
072: *
073: * @return a description of the algorithm for displaying in the
074: * explorer/experimenter gui
075: */
076: public String globalInfo() {
077: return "The class that splits a node into two based on the midpoint value of "
078: + "the dimension in which the node's rectangle is widest. If after "
079: + "splitting one side is empty then it is slided towards the non-empty "
080: + "side until there is at least one point on the empty side.\n\n"
081: + "For more information see also:\n\n"
082: + getTechnicalInformation().toString();
083: }
084:
085: /**
086: * Returns an instance of a TechnicalInformation object, containing detailed
087: * information about the technical background of this class, e.g., paper
088: * reference or book this class is based on.
089: *
090: * @return the technical information about this class
091: */
092: public TechnicalInformation getTechnicalInformation() {
093: TechnicalInformation result;
094:
095: result = new TechnicalInformation(Type.MANUAL);
096: result.setValue(Field.AUTHOR, "David M. Mount");
097: result.setValue(Field.YEAR, "2006");
098: result.setValue(Field.TITLE, "ANN Programming Manual");
099: result
100: .setValue(Field.ORGANIZATION,
101: "Department of Computer Science, University of Maryland");
102: result.setValue(Field.ADDRESS, "College Park, MD, USA");
103: result.setValue(Field.HTTP,
104: "Available from http://www.cs.umd.edu/~mount/ANN/");
105:
106: return result;
107: }
108:
109: /**
110: * Splits a node into two based on the midpoint value of the dimension
111: * in which the node's rectangle is widest. If after splitting one side
112: * is empty then it is slided towards the non-empty side until there is
113: * at least one point on the empty side. The two nodes created after the
114: * whole splitting are correctly initialised. And, node.left and
115: * node.right are set appropriately.
116: * @param node The node to split.
117: * @param numNodesCreated The number of nodes that so far have been
118: * created for the tree, so that the newly created nodes are
119: * assigned correct/meaningful node numbers/ids.
120: * @param nodeRanges The attributes' range for the points inside
121: * the node that is to be split.
122: * @param universe The attributes' range for the whole
123: * point-space.
124: * @throws Exception If there is some problem in splitting the
125: * given node.
126: */
127: public void splitNode(KDTreeNode node, int numNodesCreated,
128: double[][] nodeRanges, double[][] universe)
129: throws Exception {
130:
131: correctlyInitialized();
132:
133: if (node.m_NodesRectBounds == null) {
134: node.m_NodesRectBounds = new double[2][node.m_NodeRanges.length];
135: for (int i = 0; i < node.m_NodeRanges.length; i++) {
136: node.m_NodesRectBounds[MIN][i] = node.m_NodeRanges[i][MIN];
137: node.m_NodesRectBounds[MAX][i] = node.m_NodeRanges[i][MAX];
138: }
139: }
140:
141: // finding widest side of the hyper rectangle
142: double maxRectWidth = Double.NEGATIVE_INFINITY, maxPtWidth = Double.NEGATIVE_INFINITY, tempval;
143: int splitDim = -1, classIdx = m_Instances.classIndex();
144:
145: for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
146: if (i == classIdx)
147: continue;
148: tempval = node.m_NodesRectBounds[MAX][i]
149: - node.m_NodesRectBounds[MIN][i];
150: if (m_NormalizeNodeWidth) {
151: tempval = tempval / universe[i][WIDTH];
152: }
153: if (tempval > maxRectWidth
154: && node.m_NodeRanges[i][WIDTH] > 0.0)
155: maxRectWidth = tempval;
156: }
157:
158: for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
159: if (i == classIdx)
160: continue;
161: tempval = node.m_NodesRectBounds[MAX][i]
162: - node.m_NodesRectBounds[MIN][i];
163: if (m_NormalizeNodeWidth) {
164: tempval = tempval / universe[i][WIDTH];
165: }
166: if (tempval >= maxRectWidth * (1 - ERR)
167: && node.m_NodeRanges[i][WIDTH] > 0.0) {
168: if (node.m_NodeRanges[i][WIDTH] > maxPtWidth) {
169: maxPtWidth = node.m_NodeRanges[i][WIDTH];
170: if (m_NormalizeNodeWidth)
171: maxPtWidth = maxPtWidth / universe[i][WIDTH];
172: splitDim = i;
173: }
174: }
175: }
176:
177: double splitVal = node.m_NodesRectBounds[MIN][splitDim]
178: + (node.m_NodesRectBounds[MAX][splitDim] - node.m_NodesRectBounds[MIN][splitDim])
179: * 0.5;
180: // might want to try to slide it further to contain more than one point on
181: // the
182: // side that is resulting empty
183: if (splitVal < node.m_NodeRanges[splitDim][MIN])
184: splitVal = node.m_NodeRanges[splitDim][MIN];
185: else if (splitVal >= node.m_NodeRanges[splitDim][MAX])
186: splitVal = node.m_NodeRanges[splitDim][MAX]
187: - node.m_NodeRanges[splitDim][WIDTH] * 0.001;
188:
189: int rightStart = rearrangePoints(m_InstList, node.m_Start,
190: node.m_End, splitDim, splitVal);
191:
192: if (rightStart == node.m_Start || rightStart > node.m_End) {
193: if (rightStart == node.m_Start)
194: throw new Exception(
195: "Left child is empty in node "
196: + node.m_NodeNumber
197: + ". Not possible with "
198: + "SlidingMidPointofWidestSide splitting method. Please "
199: + "check code.");
200: else
201: throw new Exception(
202: "Right child is empty in node "
203: + node.m_NodeNumber
204: + ". Not possible with "
205: + "SlidingMidPointofWidestSide splitting method. Please "
206: + "check code.");
207: }
208:
209: node.m_SplitDim = splitDim;
210: node.m_SplitValue = splitVal;
211:
212: double[][] widths = new double[2][node.m_NodesRectBounds[0].length];
213:
214: System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN],
215: 0, node.m_NodesRectBounds[MIN].length);
216: System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX],
217: 0, node.m_NodesRectBounds[MAX].length);
218: widths[MAX][splitDim] = splitVal;
219:
220: node.m_Left = new KDTreeNode(numNodesCreated + 1, node.m_Start,
221: rightStart - 1, m_EuclideanDistance.initializeRanges(
222: m_InstList, node.m_Start, rightStart - 1),
223: widths);
224:
225: widths = new double[2][node.m_NodesRectBounds[0].length];
226: System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN],
227: 0, node.m_NodesRectBounds[MIN].length);
228: System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX],
229: 0, node.m_NodesRectBounds[MAX].length);
230: widths[MIN][splitDim] = splitVal;
231:
232: node.m_Right = new KDTreeNode(numNodesCreated + 2, rightStart,
233: node.m_End, m_EuclideanDistance.initializeRanges(
234: m_InstList, rightStart, node.m_End), widths);
235: }
236:
237: /**
238: * Re-arranges the indices array such that the points <= to the splitVal
239: * are on the left of the array and those > the splitVal are on the right.
240: *
241: * @param indices The master index array.
242: * @param startidx The begining index of portion of indices that needs
243: * re-arranging.
244: * @param endidx The end index of portion of indices that needs
245: * re-arranging.
246: * @param splitDim The split dimension/attribute.
247: * @param splitVal The split value.
248: * @return The startIdx of the points > the splitVal (the points
249: * belonging to the right child of the node).
250: */
251: protected int rearrangePoints(int[] indices, final int startidx,
252: final int endidx, final int splitDim, final double splitVal) {
253:
254: int tmp, left = startidx - 1;
255: for (int i = startidx; i <= endidx; i++) {
256: if (m_EuclideanDistance.valueIsSmallerEqual(m_Instances
257: .instance(indices[i]), splitDim, splitVal)) {
258: left++;
259: tmp = indices[left];
260: indices[left] = indices[i];
261: indices[i] = tmp;
262: }// end valueIsSmallerEqual
263: }// endfor
264: return left + 1;
265: }
266: }
|