0001: /*
0002: * This program is free software; you can redistribute it and/or modify
0003: * it under the terms of the GNU General Public License as published by
0004: * the Free Software Foundation; either version 2 of the License, or
0005: * (at your option) any later version.
0006: *
0007: * This program is distributed in the hope that it will be useful,
0008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0010: * GNU General Public License for more details.
0011: *
0012: * You should have received a copy of the GNU General Public License
0013: * along with this program; if not, write to the Free Software
0014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015: */
0016:
0017: /*
0018: * Utils.java
0019: * Copyright (C) 1999-2004 University of Waikato, Hamilton, New Zealand
0020: *
0021: */
0022:
0023: package weka.core;
0024:
0025: import java.lang.Math;
0026: import java.lang.reflect.Array;
0027: import java.util.Properties;
0028: import java.io.File;
0029: import java.io.FileInputStream;
0030: import java.util.Random;
0031:
0032: /**
0033: * Class implementing some simple utility methods.
0034: *
0035: * @author Eibe Frank
0036: * @author Yong Wang
0037: * @author Len Trigg
0038: * @author Julien Prados
0039: * @version $Revision: 1.57 $
0040: */
0041: public final class Utils {
0042:
0043: /** The natural logarithm of 2. */
0044: public static double log2 = Math.log(2);
0045:
0046: /** The small deviation allowed in double comparisons */
0047: public static double SMALL = 1e-6;
0048:
0049: /**
0050: * Reads properties that inherit from three locations. Properties
0051: * are first defined in the system resource location (i.e. in the
0052: * CLASSPATH). These default properties must exist. Properties
0053: * defined in the users home directory (optional) override default
0054: * settings. Properties defined in the current directory (optional)
0055: * override all these settings.
0056: *
0057: * @param resourceName the location of the resource that should be
0058: * loaded. e.g.: "weka/core/Utils.props". (The use of hardcoded
0059: * forward slashes here is OK - see
0060: * jdk1.1/docs/guide/misc/resources.html) This routine will also
0061: * look for the file (in this case) "Utils.props" in the users home
0062: * directory and the current directory.
0063: * @return the Properties
0064: * @exception Exception if no default properties are defined, or if
0065: * an error occurs reading the properties files.
0066: */
0067: public static Properties readProperties(String resourceName)
0068: throws Exception {
0069:
0070: Properties defaultProps = new Properties();
0071: try {
0072: // Apparently hardcoded slashes are OK here
0073: // jdk1.1/docs/guide/misc/resources.html
0074: defaultProps.load(ClassLoader
0075: .getSystemResourceAsStream(resourceName));
0076: } catch (Exception ex) {
0077: /* throw new Exception("Problem reading default properties: "
0078: + ex.getMessage()); */
0079: System.err
0080: .println("Warning, unable to load properties file from "
0081: + "system resource (Utils.java)");
0082: }
0083:
0084: // Hardcoded slash is OK here
0085: // eg: see jdk1.1/docs/guide/misc/resources.html
0086: int slInd = resourceName.lastIndexOf('/');
0087: if (slInd != -1) {
0088: resourceName = resourceName.substring(slInd + 1);
0089: }
0090:
0091: // Allow a properties file in the home directory to override
0092: Properties userProps = new Properties(defaultProps);
0093: File propFile = new File(System.getProperties().getProperty(
0094: "user.home")
0095: + File.separatorChar + resourceName);
0096: if (propFile.exists()) {
0097: try {
0098: userProps.load(new FileInputStream(propFile));
0099: } catch (Exception ex) {
0100: throw new Exception("Problem reading user properties: "
0101: + propFile);
0102: }
0103: }
0104:
0105: // Allow a properties file in the current directory to override
0106: Properties localProps = new Properties(userProps);
0107: propFile = new File(resourceName);
0108: if (propFile.exists()) {
0109: try {
0110: localProps.load(new FileInputStream(propFile));
0111: } catch (Exception ex) {
0112: throw new Exception(
0113: "Problem reading local properties: " + propFile);
0114: }
0115: }
0116:
0117: return localProps;
0118: }
0119:
0120: /**
0121: * Returns the correlation coefficient of two double vectors.
0122: *
0123: * @param y1 double vector 1
0124: * @param y2 double vector 2
0125: * @param n the length of two double vectors
0126: * @return the correlation coefficient
0127: */
0128: public static final double correlation(double y1[], double y2[],
0129: int n) {
0130:
0131: int i;
0132: double av1 = 0.0, av2 = 0.0, y11 = 0.0, y22 = 0.0, y12 = 0.0, c;
0133:
0134: if (n <= 1) {
0135: return 1.0;
0136: }
0137: for (i = 0; i < n; i++) {
0138: av1 += y1[i];
0139: av2 += y2[i];
0140: }
0141: av1 /= (double) n;
0142: av2 /= (double) n;
0143: for (i = 0; i < n; i++) {
0144: y11 += (y1[i] - av1) * (y1[i] - av1);
0145: y22 += (y2[i] - av2) * (y2[i] - av2);
0146: y12 += (y1[i] - av1) * (y2[i] - av2);
0147: }
0148: if (y11 * y22 == 0.0) {
0149: c = 1.0;
0150: } else {
0151: c = y12 / Math.sqrt(Math.abs(y11 * y22));
0152: }
0153:
0154: return c;
0155: }
0156:
0157: /**
0158: * Removes all occurrences of a string from another string.
0159: *
0160: * @param inString the string to remove substrings from.
0161: * @param substring the substring to remove.
0162: * @return the input string with occurrences of substring removed.
0163: */
0164: public static String removeSubstring(String inString,
0165: String substring) {
0166:
0167: StringBuffer result = new StringBuffer();
0168: int oldLoc = 0, loc = 0;
0169: while ((loc = inString.indexOf(substring, oldLoc)) != -1) {
0170: result.append(inString.substring(oldLoc, loc));
0171: oldLoc = loc + substring.length();
0172: }
0173: result.append(inString.substring(oldLoc));
0174: return result.toString();
0175: }
0176:
0177: /**
0178: * Replaces with a new string, all occurrences of a string from
0179: * another string.
0180: *
0181: * @param inString the string to replace substrings in.
0182: * @param subString the substring to replace.
0183: * @param replaceString the replacement substring
0184: * @return the input string with occurrences of substring replaced.
0185: */
0186: public static String replaceSubstring(String inString,
0187: String subString, String replaceString) {
0188:
0189: StringBuffer result = new StringBuffer();
0190: int oldLoc = 0, loc = 0;
0191: while ((loc = inString.indexOf(subString, oldLoc)) != -1) {
0192: result.append(inString.substring(oldLoc, loc));
0193: result.append(replaceString);
0194: oldLoc = loc + subString.length();
0195: }
0196: result.append(inString.substring(oldLoc));
0197: return result.toString();
0198: }
0199:
0200: /**
0201: * Pads a string to a specified length, inserting spaces on the left
0202: * as required. If the string is too long, characters are removed (from
0203: * the right).
0204: *
0205: * @param inString the input string
0206: * @param length the desired length of the output string
0207: * @return the output string
0208: */
0209: public static String padLeft(String inString, int length) {
0210:
0211: return fixStringLength(inString, length, false);
0212: }
0213:
0214: /**
0215: * Pads a string to a specified length, inserting spaces on the right
0216: * as required. If the string is too long, characters are removed (from
0217: * the right).
0218: *
0219: * @param inString the input string
0220: * @param length the desired length of the output string
0221: * @return the output string
0222: */
0223: public static String padRight(String inString, int length) {
0224:
0225: return fixStringLength(inString, length, true);
0226: }
0227:
0228: /**
0229: * Pads a string to a specified length, inserting spaces as
0230: * required. If the string is too long, characters are removed (from
0231: * the right).
0232: *
0233: * @param inString the input string
0234: * @param length the desired length of the output string
0235: * @param right true if inserted spaces should be added to the right
0236: * @return the output string
0237: */
0238: private static/*@pure@*/String fixStringLength(String inString,
0239: int length, boolean right) {
0240:
0241: if (inString.length() < length) {
0242: while (inString.length() < length) {
0243: inString = (right ? inString.concat(" ") : " "
0244: .concat(inString));
0245: }
0246: } else if (inString.length() > length) {
0247: inString = inString.substring(0, length);
0248: }
0249: return inString;
0250: }
0251:
0252: /**
0253: * Rounds a double and converts it into String.
0254: *
0255: * @param value the double value
0256: * @param afterDecimalPoint the (maximum) number of digits permitted
0257: * after the decimal point
0258: * @return the double as a formatted string
0259: */
0260: public static/*@pure@*/String doubleToString(double value,
0261: int afterDecimalPoint) {
0262:
0263: StringBuffer stringBuffer;
0264: double temp;
0265: int dotPosition;
0266: long precisionValue;
0267:
0268: temp = value * Math.pow(10.0, afterDecimalPoint);
0269: if (Math.abs(temp) < Long.MAX_VALUE) {
0270: precisionValue = (temp > 0) ? (long) (temp + 0.5)
0271: : -(long) (Math.abs(temp) + 0.5);
0272: if (precisionValue == 0) {
0273: stringBuffer = new StringBuffer(String.valueOf(0));
0274: } else {
0275: stringBuffer = new StringBuffer(String
0276: .valueOf(precisionValue));
0277: }
0278: if (afterDecimalPoint == 0) {
0279: return stringBuffer.toString();
0280: }
0281: dotPosition = stringBuffer.length() - afterDecimalPoint;
0282: while (((precisionValue < 0) && (dotPosition < 1))
0283: || (dotPosition < 0)) {
0284: if (precisionValue < 0) {
0285: stringBuffer.insert(1, '0');
0286: } else {
0287: stringBuffer.insert(0, '0');
0288: }
0289: dotPosition++;
0290: }
0291: stringBuffer.insert(dotPosition, '.');
0292: if ((precisionValue < 0) && (stringBuffer.charAt(1) == '.')) {
0293: stringBuffer.insert(1, '0');
0294: } else if (stringBuffer.charAt(0) == '.') {
0295: stringBuffer.insert(0, '0');
0296: }
0297: int currentPos = stringBuffer.length() - 1;
0298: while ((currentPos > dotPosition)
0299: && (stringBuffer.charAt(currentPos) == '0')) {
0300: stringBuffer.setCharAt(currentPos--, ' ');
0301: }
0302: if (stringBuffer.charAt(currentPos) == '.') {
0303: stringBuffer.setCharAt(currentPos, ' ');
0304: }
0305:
0306: return stringBuffer.toString().trim();
0307: }
0308: return new String("" + value);
0309: }
0310:
0311: /**
0312: * Rounds a double and converts it into a formatted decimal-justified String.
0313: * Trailing 0's are replaced with spaces.
0314: *
0315: * @param value the double value
0316: * @param width the width of the string
0317: * @param afterDecimalPoint the number of digits after the decimal point
0318: * @return the double as a formatted string
0319: */
0320: public static/*@pure@*/String doubleToString(double value,
0321: int width, int afterDecimalPoint) {
0322:
0323: String tempString = doubleToString(value, afterDecimalPoint);
0324: char[] result;
0325: int dotPosition;
0326:
0327: if ((afterDecimalPoint >= width)
0328: || (tempString.indexOf('E') != -1)) { // Protects sci notation
0329: return tempString;
0330: }
0331:
0332: // Initialize result
0333: result = new char[width];
0334: for (int i = 0; i < result.length; i++) {
0335: result[i] = ' ';
0336: }
0337:
0338: if (afterDecimalPoint > 0) {
0339: // Get position of decimal point and insert decimal point
0340: dotPosition = tempString.indexOf('.');
0341: if (dotPosition == -1) {
0342: dotPosition = tempString.length();
0343: } else {
0344: result[width - afterDecimalPoint - 1] = '.';
0345: }
0346: } else {
0347: dotPosition = tempString.length();
0348: }
0349:
0350: int offset = width - afterDecimalPoint - dotPosition;
0351: if (afterDecimalPoint > 0) {
0352: offset--;
0353: }
0354:
0355: // Not enough room to decimal align within the supplied width
0356: if (offset < 0) {
0357: return tempString;
0358: }
0359:
0360: // Copy characters before decimal point
0361: for (int i = 0; i < dotPosition; i++) {
0362: result[offset + i] = tempString.charAt(i);
0363: }
0364:
0365: // Copy characters after decimal point
0366: for (int i = dotPosition + 1; i < tempString.length(); i++) {
0367: result[offset + i] = tempString.charAt(i);
0368: }
0369:
0370: return new String(result);
0371: }
0372:
0373: /**
0374: * Returns the basic class of an array class (handles multi-dimensional
0375: * arrays).
0376: * @param c the array to inspect
0377: * @return the class of the innermost elements
0378: */
0379: public static Class getArrayClass(Class c) {
0380: if (c.getComponentType().isArray())
0381: return getArrayClass(c.getComponentType());
0382: else
0383: return c.getComponentType();
0384: }
0385:
0386: /**
0387: * Returns the dimensions of the given array. Even though the
0388: * parameter is of type "Object" one can hand over primitve arrays, e.g.
0389: * int[3] or double[2][4].
0390: *
0391: * @param array the array to determine the dimensions for
0392: * @return the dimensions of the array
0393: */
0394: public static int getArrayDimensions(Class array) {
0395: if (array.getComponentType().isArray())
0396: return 1 + getArrayDimensions(array.getComponentType());
0397: else
0398: return 1;
0399: }
0400:
0401: /**
0402: * Returns the dimensions of the given array. Even though the
0403: * parameter is of type "Object" one can hand over primitve arrays, e.g.
0404: * int[3] or double[2][4].
0405: *
0406: * @param array the array to determine the dimensions for
0407: * @return the dimensions of the array
0408: */
0409: public static int getArrayDimensions(Object array) {
0410: return getArrayDimensions(array.getClass());
0411: }
0412:
0413: /**
0414: * Returns the given Array in a string representation. Even though the
0415: * parameter is of type "Object" one can hand over primitve arrays, e.g.
0416: * int[3] or double[2][4].
0417: *
0418: * @param array the array to return in a string representation
0419: * @return the array as string
0420: */
0421: public static String arrayToString(Object array) {
0422: String result;
0423: int dimensions;
0424: int i;
0425:
0426: result = "";
0427: dimensions = getArrayDimensions(array);
0428:
0429: if (dimensions == 0) {
0430: result = "null";
0431: } else if (dimensions == 1) {
0432: for (i = 0; i < Array.getLength(array); i++) {
0433: if (i > 0)
0434: result += ",";
0435: if (Array.get(array, i) == null)
0436: result += "null";
0437: else
0438: result += Array.get(array, i).toString();
0439: }
0440: } else {
0441: for (i = 0; i < Array.getLength(array); i++) {
0442: if (i > 0)
0443: result += ",";
0444: result += "[" + arrayToString(Array.get(array, i))
0445: + "]";
0446: }
0447: }
0448:
0449: return result;
0450: }
0451:
0452: /**
0453: * Tests if a is equal to b.
0454: *
0455: * @param a a double
0456: * @param b a double
0457: */
0458: public static/*@pure@*/boolean eq(double a, double b) {
0459:
0460: return (a - b < SMALL) && (b - a < SMALL);
0461: }
0462:
0463: /**
0464: * Checks if the given array contains any non-empty options.
0465: *
0466: * @param options an array of strings
0467: * @exception Exception if there are any non-empty options
0468: */
0469: public static void checkForRemainingOptions(String[] options)
0470: throws Exception {
0471:
0472: int illegalOptionsFound = 0;
0473: StringBuffer text = new StringBuffer();
0474:
0475: if (options == null) {
0476: return;
0477: }
0478: for (int i = 0; i < options.length; i++) {
0479: if (options[i].length() > 0) {
0480: illegalOptionsFound++;
0481: text.append(options[i] + ' ');
0482: }
0483: }
0484: if (illegalOptionsFound > 0) {
0485: throw new Exception("Illegal options: " + text);
0486: }
0487: }
0488:
0489: /**
0490: * Checks if the given array contains the flag "-Char". Stops
0491: * searching at the first marker "--". If the flag is found,
0492: * it is replaced with the empty string.
0493: *
0494: * @param flag the character indicating the flag.
0495: * @param options the array of strings containing all the options.
0496: * @return true if the flag was found
0497: * @exception Exception if an illegal option was found
0498: */
0499: public static boolean getFlag(char flag, String[] options)
0500: throws Exception {
0501:
0502: return getFlag("" + flag, options);
0503: }
0504:
0505: /**
0506: * Checks if the given array contains the flag "-String". Stops
0507: * searching at the first marker "--". If the flag is found,
0508: * it is replaced with the empty string.
0509: *
0510: * @param flag the String indicating the flag.
0511: * @param options the array of strings containing all the options.
0512: * @return true if the flag was found
0513: * @exception Exception if an illegal option was found
0514: */
0515: public static boolean getFlag(String flag, String[] options)
0516: throws Exception {
0517:
0518: int pos = getOptionPos(flag, options);
0519:
0520: if (pos > -1)
0521: options[pos] = "";
0522:
0523: return (pos > -1);
0524: }
0525:
0526: /**
0527: * Gets an option indicated by a flag "-Char" from the given array
0528: * of strings. Stops searching at the first marker "--". Replaces
0529: * flag and option with empty strings.
0530: *
0531: * @param flag the character indicating the option.
0532: * @param options the array of strings containing all the options.
0533: * @return the indicated option or an empty string
0534: * @exception Exception if the option indicated by the flag can't be found
0535: */
0536: public static/*@non_null@*/String getOption(char flag,
0537: String[] options) throws Exception {
0538:
0539: return getOption("" + flag, options);
0540: }
0541:
0542: /**
0543: * Gets an option indicated by a flag "-String" from the given array
0544: * of strings. Stops searching at the first marker "--". Replaces
0545: * flag and option with empty strings.
0546: *
0547: * @param flag the String indicating the option.
0548: * @param options the array of strings containing all the options.
0549: * @return the indicated option or an empty string
0550: * @exception Exception if the option indicated by the flag can't be found
0551: */
0552: public static/*@non_null@*/String getOption(String flag,
0553: String[] options) throws Exception {
0554:
0555: String newString;
0556: int i = getOptionPos(flag, options);
0557:
0558: if (i > -1) {
0559: if (options[i].equals("-" + flag)) {
0560: if (i + 1 == options.length) {
0561: throw new Exception("No value given for -" + flag
0562: + " option.");
0563: }
0564: options[i] = "";
0565: newString = new String(options[i + 1]);
0566: options[i + 1] = "";
0567: return newString;
0568: }
0569: if (options[i].charAt(1) == '-') {
0570: return "";
0571: }
0572: }
0573:
0574: return "";
0575: }
0576:
0577: /**
0578: * Gets the index of an option or flag indicated by a flag "-Char" from
0579: * the given array of strings. Stops searching at the first marker "--".
0580: *
0581: * @param flag the character indicating the option.
0582: * @param options the array of strings containing all the options.
0583: * @return the position if found, or -1 otherwise
0584: */
0585: public static int getOptionPos(char flag, String[] options) {
0586: return getOptionPos("" + flag, options);
0587: }
0588:
0589: /**
0590: * Gets the index of an option or flag indicated by a flag "-String" from
0591: * the given array of strings. Stops searching at the first marker "--".
0592: *
0593: * @param flag the String indicating the option.
0594: * @param options the array of strings containing all the options.
0595: * @return the position if found, or -1 otherwise
0596: */
0597: public static int getOptionPos(String flag, String[] options) {
0598: if (options == null)
0599: return -1;
0600:
0601: for (int i = 0; i < options.length; i++) {
0602: if ((options[i].length() > 0)
0603: && (options[i].charAt(0) == '-')) {
0604: // Check if it is a negative number
0605: try {
0606: Double.valueOf(options[i]);
0607: } catch (NumberFormatException e) {
0608: // found?
0609: if (options[i].equals("-" + flag))
0610: return i;
0611: // did we reach "--"?
0612: if (options[i].charAt(1) == '-')
0613: return -1;
0614: }
0615: }
0616: }
0617:
0618: return -1;
0619: }
0620:
0621: /**
0622: * Quotes a string if it contains special characters.
0623: *
0624: * The following rules are applied:
0625: *
0626: * A character is backquoted version of it is one
0627: * of <tt>" ' % \ \n \r \t</tt>.
0628: *
0629: * A string is enclosed within single quotes if a character has been
0630: * backquoted using the previous rule above or contains
0631: * <tt>{ }</tt> or is exactly equal to the strings
0632: * <tt>, ? space or ""</tt> (empty string).
0633: *
0634: * A quoted question mark distinguishes it from the missing value which
0635: * is represented as an unquoted question mark in arff files.
0636: *
0637: * @param string the string to be quoted
0638: * @return the string (possibly quoted)
0639: * @see #unquote(String)
0640: */
0641: public static/*@pure@*/String quote(String string) {
0642: boolean quote = false;
0643:
0644: // backquote the following characters
0645: if ((string.indexOf('\n') != -1)
0646: || (string.indexOf('\r') != -1)
0647: || (string.indexOf('\'') != -1)
0648: || (string.indexOf('"') != -1)
0649: || (string.indexOf('\\') != -1)
0650: || (string.indexOf('\t') != -1)
0651: || (string.indexOf('%') != -1)) {
0652: string = backQuoteChars(string);
0653: quote = true;
0654: }
0655:
0656: // Enclose the string in 's if the string contains a recently added
0657: // backquote or contains one of the following characters.
0658: if ((quote == true) || (string.indexOf('{') != -1)
0659: || (string.indexOf('}') != -1)
0660: || (string.indexOf(',') != -1) || (string.equals("?"))
0661: || (string.indexOf(' ') != -1) || (string.equals(""))) {
0662: string = ("'".concat(string)).concat("'");
0663: }
0664:
0665: return string;
0666: }
0667:
0668: /**
0669: * unquotes are previously quoted string (but only if necessary), i.e., it
0670: * removes the single quotes around it. Inverse to quote(String).
0671: *
0672: * @param string the string to process
0673: * @return the unquoted string
0674: * @see #quote(String)
0675: */
0676: public static String unquote(String string) {
0677: if (string.startsWith("'") && string.endsWith("'")) {
0678: string = string.substring(1, string.length() - 1);
0679:
0680: if ((string.indexOf("\\n") != -1)
0681: || (string.indexOf("\\r") != -1)
0682: || (string.indexOf("\\'") != -1)
0683: || (string.indexOf("\\\"") != -1)
0684: || (string.indexOf("\\\\") != -1)
0685: || (string.indexOf("\\t") != -1)
0686: || (string.indexOf("\\%") != -1)) {
0687: string = unbackQuoteChars(string);
0688: }
0689: }
0690:
0691: return string;
0692: }
0693:
0694: /**
0695: * Converts carriage returns and new lines in a string into \r and \n.
0696: * Backquotes the following characters: ` " \ \t and %
0697: *
0698: * @param string the string
0699: * @return the converted string
0700: * @see #unbackQuoteChars(String)
0701: */
0702: public static/*@pure@*/String backQuoteChars(String string) {
0703:
0704: int index;
0705: StringBuffer newStringBuffer;
0706:
0707: // replace each of the following characters with the backquoted version
0708: char charsFind[] = { '\\', '\'', '\t', '\n', '\r', '"', '%' };
0709: String charsReplace[] = { "\\\\", "\\'", "\\t", "\\n", "\\r",
0710: "\\\"", "\\%" };
0711: for (int i = 0; i < charsFind.length; i++) {
0712: if (string.indexOf(charsFind[i]) != -1) {
0713: newStringBuffer = new StringBuffer();
0714: while ((index = string.indexOf(charsFind[i])) != -1) {
0715: if (index > 0) {
0716: newStringBuffer.append(string.substring(0,
0717: index));
0718: }
0719: newStringBuffer.append(charsReplace[i]);
0720: if ((index + 1) < string.length()) {
0721: string = string.substring(index + 1);
0722: } else {
0723: string = "";
0724: }
0725: }
0726: newStringBuffer.append(string);
0727: string = newStringBuffer.toString();
0728: }
0729: }
0730:
0731: return string;
0732: }
0733:
0734: /**
0735: * Converts carriage returns and new lines in a string into \r and \n.
0736: *
0737: * @param string the string
0738: * @return the converted string
0739: */
0740: public static String convertNewLines(String string) {
0741: int index;
0742:
0743: // Replace with \n
0744: StringBuffer newStringBuffer = new StringBuffer();
0745: while ((index = string.indexOf('\n')) != -1) {
0746: if (index > 0) {
0747: newStringBuffer.append(string.substring(0, index));
0748: }
0749: newStringBuffer.append('\\');
0750: newStringBuffer.append('n');
0751: if ((index + 1) < string.length()) {
0752: string = string.substring(index + 1);
0753: } else {
0754: string = "";
0755: }
0756: }
0757: newStringBuffer.append(string);
0758: string = newStringBuffer.toString();
0759:
0760: // Replace with \r
0761: newStringBuffer = new StringBuffer();
0762: while ((index = string.indexOf('\r')) != -1) {
0763: if (index > 0) {
0764: newStringBuffer.append(string.substring(0, index));
0765: }
0766: newStringBuffer.append('\\');
0767: newStringBuffer.append('r');
0768: if ((index + 1) < string.length()) {
0769: string = string.substring(index + 1);
0770: } else {
0771: string = "";
0772: }
0773: }
0774: newStringBuffer.append(string);
0775: return newStringBuffer.toString();
0776: }
0777:
0778: /**
0779: * Reverts \r and \n in a string into carriage returns and new lines.
0780: *
0781: * @param string the string
0782: * @return the converted string
0783: */
0784: public static String revertNewLines(String string) {
0785: int index;
0786:
0787: // Replace with \n
0788: StringBuffer newStringBuffer = new StringBuffer();
0789: while ((index = string.indexOf("\\n")) != -1) {
0790: if (index > 0) {
0791: newStringBuffer.append(string.substring(0, index));
0792: }
0793: newStringBuffer.append('\n');
0794: if ((index + 2) < string.length()) {
0795: string = string.substring(index + 2);
0796: } else {
0797: string = "";
0798: }
0799: }
0800: newStringBuffer.append(string);
0801: string = newStringBuffer.toString();
0802:
0803: // Replace with \r
0804: newStringBuffer = new StringBuffer();
0805: while ((index = string.indexOf("\\r")) != -1) {
0806: if (index > 0) {
0807: newStringBuffer.append(string.substring(0, index));
0808: }
0809: newStringBuffer.append('\r');
0810: if ((index + 2) < string.length()) {
0811: string = string.substring(index + 2);
0812: } else {
0813: string = "";
0814: }
0815: }
0816: newStringBuffer.append(string);
0817:
0818: return newStringBuffer.toString();
0819: }
0820:
0821: /**
0822: * Returns the secondary set of options (if any) contained in
0823: * the supplied options array. The secondary set is defined to
0824: * be any options after the first "--". These options are removed from
0825: * the original options array.
0826: *
0827: * @param options the input array of options
0828: * @return the array of secondary options
0829: */
0830: public static String[] partitionOptions(String[] options) {
0831:
0832: for (int i = 0; i < options.length; i++) {
0833: if (options[i].equals("--")) {
0834: options[i++] = "";
0835: String[] result = new String[options.length - i];
0836: for (int j = i; j < options.length; j++) {
0837: result[j - i] = options[j];
0838: options[j] = "";
0839: }
0840: return result;
0841: }
0842: }
0843: return new String[0];
0844: }
0845:
0846: /**
0847: * The inverse operation of backQuoteChars().
0848: * Converts back-quoted carriage returns and new lines in a string
0849: * to the corresponding character ('\r' and '\n').
0850: * Also "un"-back-quotes the following characters: ` " \ \t and %
0851: *
0852: * @param string the string
0853: * @return the converted string
0854: * @see #backQuoteChars(String)
0855: */
0856: public static String unbackQuoteChars(String string) {
0857:
0858: int index;
0859: StringBuffer newStringBuffer;
0860:
0861: // replace each of the following characters with the backquoted version
0862: String charsFind[] = { "\\\\", "\\'", "\\t", "\\n", "\\r",
0863: "\\\"", "\\%" };
0864: char charsReplace[] = { '\\', '\'', '\t', '\n', '\r', '"', '%' };
0865: int pos[] = new int[charsFind.length];
0866: int curPos;
0867:
0868: String str = new String(string);
0869: newStringBuffer = new StringBuffer();
0870: while (str.length() > 0) {
0871: // get positions and closest character to replace
0872: curPos = str.length();
0873: index = -1;
0874: for (int i = 0; i < pos.length; i++) {
0875: pos[i] = str.indexOf(charsFind[i]);
0876: if ((pos[i] > -1) && (pos[i] < curPos)) {
0877: index = i;
0878: curPos = pos[i];
0879: }
0880: }
0881:
0882: // replace character if found, otherwise finished
0883: if (index == -1) {
0884: newStringBuffer.append(str);
0885: str = "";
0886: } else {
0887: newStringBuffer.append(str.substring(0, pos[index]));
0888: newStringBuffer.append(charsReplace[index]);
0889: str = str.substring(pos[index]
0890: + charsFind[index].length());
0891: }
0892: }
0893:
0894: return newStringBuffer.toString();
0895: }
0896:
0897: /**
0898: * Split up a string containing options into an array of strings,
0899: * one for each option.
0900: *
0901: * @param quotedOptionString the string containing the options
0902: * @return the array of options
0903: * @throws Exception in case of an unterminated string, unknown character or
0904: * a parse error
0905: */
0906: public static String[] splitOptions(String quotedOptionString)
0907: throws Exception {
0908:
0909: FastVector optionsVec = new FastVector();
0910: String str = new String(quotedOptionString);
0911: int i;
0912:
0913: while (true) {
0914:
0915: //trimLeft
0916: i = 0;
0917: while ((i < str.length())
0918: && (Character.isWhitespace(str.charAt(i))))
0919: i++;
0920: str = str.substring(i);
0921:
0922: //stop when str is empty
0923: if (str.length() == 0)
0924: break;
0925:
0926: //if str start with a double quote
0927: if (str.charAt(0) == '"') {
0928:
0929: //find the first not anti-slached double quote
0930: i = 1;
0931: while (i < str.length()) {
0932: if (str.charAt(i) == str.charAt(0))
0933: break;
0934: if (str.charAt(i) == '\\') {
0935: i += 1;
0936: if (i >= str.length())
0937: throw new Exception(
0938: "String should not finish with \\");
0939: }
0940: i += 1;
0941: }
0942: if (i >= str.length())
0943: throw new Exception("Quote parse error.");
0944:
0945: //add the founded string to the option vector (without quotes)
0946: String optStr = str.substring(1, i);
0947: optStr = unbackQuoteChars(optStr);
0948: optionsVec.addElement(optStr);
0949: str = str.substring(i + 1);
0950: } else {
0951: //find first whiteSpace
0952: i = 0;
0953: while ((i < str.length())
0954: && (!Character.isWhitespace(str.charAt(i))))
0955: i++;
0956:
0957: //add the founded string to the option vector
0958: String optStr = str.substring(0, i);
0959: optionsVec.addElement(optStr);
0960: str = str.substring(i);
0961: }
0962: }
0963:
0964: //convert optionsVec to an array of String
0965: String[] options = new String[optionsVec.size()];
0966: for (i = 0; i < optionsVec.size(); i++) {
0967: options[i] = (String) optionsVec.elementAt(i);
0968: }
0969: return options;
0970: }
0971:
0972: /**
0973: * Joins all the options in an option array into a single string,
0974: * as might be used on the command line.
0975: *
0976: * @param optionArray the array of options
0977: * @return the string containing all options.
0978: */
0979: public static String joinOptions(String[] optionArray) {
0980:
0981: String optionString = "";
0982: for (int i = 0; i < optionArray.length; i++) {
0983: if (optionArray[i].equals("")) {
0984: continue;
0985: }
0986: boolean escape = false;
0987: for (int n = 0; n < optionArray[i].length(); n++) {
0988: if (Character.isWhitespace(optionArray[i].charAt(n))) {
0989: escape = true;
0990: break;
0991: }
0992: }
0993: if (escape) {
0994: optionString += '"' + backQuoteChars(optionArray[i]) + '"';
0995: } else {
0996: optionString += optionArray[i];
0997: }
0998: optionString += " ";
0999: }
1000: return optionString.trim();
1001: }
1002:
1003: /**
1004: * Creates a new instance of an object given it's class name and
1005: * (optional) arguments to pass to it's setOptions method. If the
1006: * object implements OptionHandler and the options parameter is
1007: * non-null, the object will have it's options set. Example use:<p>
1008: *
1009: * <code> <pre>
1010: * String classifierName = Utils.getOption('W', options);
1011: * Classifier c = (Classifier)Utils.forName(Classifier.class,
1012: * classifierName,
1013: * options);
1014: * setClassifier(c);
1015: * </pre></code>
1016: *
1017: * @param classType the class that the instantiated object should
1018: * be assignable to -- an exception is thrown if this is not the case
1019: * @param className the fully qualified class name of the object
1020: * @param options an array of options suitable for passing to setOptions. May
1021: * be null. Any options accepted by the object will be removed from the
1022: * array.
1023: * @return the newly created object, ready for use.
1024: * @exception Exception if the class name is invalid, or if the
1025: * class is not assignable to the desired class type, or the options
1026: * supplied are not acceptable to the object
1027: */
1028: public static Object forName(Class classType, String className,
1029: String[] options) throws Exception {
1030:
1031: Class c = null;
1032: try {
1033: c = Class.forName(className);
1034: } catch (Exception ex) {
1035: throw new Exception("Can't find class called: " + className);
1036: }
1037: if (!classType.isAssignableFrom(c)) {
1038: throw new Exception(classType.getName()
1039: + " is not assignable from " + className);
1040: }
1041: Object o = c.newInstance();
1042: if ((o instanceof OptionHandler) && (options != null)) {
1043: ((OptionHandler) o).setOptions(options);
1044: Utils.checkForRemainingOptions(options);
1045: }
1046: return o;
1047: }
1048:
1049: /**
1050: * Computes entropy for an array of integers.
1051: *
1052: * @param counts array of counts
1053: * @return - a log2 a - b log2 b - c log2 c + (a+b+c) log2 (a+b+c)
1054: * when given array [a b c]
1055: */
1056: public static/*@pure@*/double info(int counts[]) {
1057:
1058: int total = 0;
1059: double x = 0;
1060: for (int j = 0; j < counts.length; j++) {
1061: x -= xlogx(counts[j]);
1062: total += counts[j];
1063: }
1064: return x + xlogx(total);
1065: }
1066:
1067: /**
1068: * Tests if a is smaller or equal to b.
1069: *
1070: * @param a a double
1071: * @param b a double
1072: */
1073: public static/*@pure@*/boolean smOrEq(double a, double b) {
1074:
1075: return (a - b < SMALL);
1076: }
1077:
1078: /**
1079: * Tests if a is greater or equal to b.
1080: *
1081: * @param a a double
1082: * @param b a double
1083: */
1084: public static/*@pure@*/boolean grOrEq(double a, double b) {
1085:
1086: return (b - a < SMALL);
1087: }
1088:
1089: /**
1090: * Tests if a is smaller than b.
1091: *
1092: * @param a a double
1093: * @param b a double
1094: */
1095: public static/*@pure@*/boolean sm(double a, double b) {
1096:
1097: return (b - a > SMALL);
1098: }
1099:
1100: /**
1101: * Tests if a is greater than b.
1102: *
1103: * @param a a double
1104: * @param b a double
1105: */
1106: public static/*@pure@*/boolean gr(double a, double b) {
1107:
1108: return (a - b > SMALL);
1109: }
1110:
1111: /**
1112: * Returns the kth-smallest value in the array.
1113: *
1114: * @param array the array of integers
1115: * @param k the value of k
1116: * @return the kth-smallest value
1117: */
1118: public static double kthSmallestValue(int[] array, int k) {
1119:
1120: int[] index = new int[array.length];
1121:
1122: for (int i = 0; i < index.length; i++) {
1123: index[i] = i;
1124: }
1125:
1126: return array[index[select(array, index, 0, array.length - 1, k)]];
1127: }
1128:
1129: /**
1130: * Returns the kth-smallest value in the array
1131: *
1132: * @param array the array of double
1133: * @param k the value of k
1134: * @return the kth-smallest value
1135: */
1136: public static double kthSmallestValue(double[] array, int k) {
1137:
1138: int[] index = new int[array.length];
1139:
1140: for (int i = 0; i < index.length; i++) {
1141: index[i] = i;
1142: }
1143:
1144: return array[index[select(array, index, 0, array.length - 1, k)]];
1145: }
1146:
1147: /**
1148: * Returns the logarithm of a for base 2.
1149: *
1150: * @param a a double
1151: * @return the logarithm for base 2
1152: */
1153: public static/*@pure@*/double log2(double a) {
1154:
1155: return Math.log(a) / log2;
1156: }
1157:
1158: /**
1159: * Returns index of maximum element in a given
1160: * array of doubles. First maximum is returned.
1161: *
1162: * @param doubles the array of doubles
1163: * @return the index of the maximum element
1164: */
1165: public static/*@pure@*/int maxIndex(double[] doubles) {
1166:
1167: double maximum = 0;
1168: int maxIndex = 0;
1169:
1170: for (int i = 0; i < doubles.length; i++) {
1171: if ((i == 0) || (doubles[i] > maximum)) {
1172: maxIndex = i;
1173: maximum = doubles[i];
1174: }
1175: }
1176:
1177: return maxIndex;
1178: }
1179:
1180: /**
1181: * Returns index of maximum element in a given
1182: * array of integers. First maximum is returned.
1183: *
1184: * @param ints the array of integers
1185: * @return the index of the maximum element
1186: */
1187: public static/*@pure@*/int maxIndex(int[] ints) {
1188:
1189: int maximum = 0;
1190: int maxIndex = 0;
1191:
1192: for (int i = 0; i < ints.length; i++) {
1193: if ((i == 0) || (ints[i] > maximum)) {
1194: maxIndex = i;
1195: maximum = ints[i];
1196: }
1197: }
1198:
1199: return maxIndex;
1200: }
1201:
1202: /**
1203: * Computes the mean for an array of doubles.
1204: *
1205: * @param vector the array
1206: * @return the mean
1207: */
1208: public static/*@pure@*/double mean(double[] vector) {
1209:
1210: double sum = 0;
1211:
1212: if (vector.length == 0) {
1213: return 0;
1214: }
1215: for (int i = 0; i < vector.length; i++) {
1216: sum += vector[i];
1217: }
1218: return sum / (double) vector.length;
1219: }
1220:
1221: /**
1222: * Returns index of minimum element in a given
1223: * array of integers. First minimum is returned.
1224: *
1225: * @param ints the array of integers
1226: * @return the index of the minimum element
1227: */
1228: public static/*@pure@*/int minIndex(int[] ints) {
1229:
1230: int minimum = 0;
1231: int minIndex = 0;
1232:
1233: for (int i = 0; i < ints.length; i++) {
1234: if ((i == 0) || (ints[i] < minimum)) {
1235: minIndex = i;
1236: minimum = ints[i];
1237: }
1238: }
1239:
1240: return minIndex;
1241: }
1242:
1243: /**
1244: * Returns index of minimum element in a given
1245: * array of doubles. First minimum is returned.
1246: *
1247: * @param doubles the array of doubles
1248: * @return the index of the minimum element
1249: */
1250: public static/*@pure@*/int minIndex(double[] doubles) {
1251:
1252: double minimum = 0;
1253: int minIndex = 0;
1254:
1255: for (int i = 0; i < doubles.length; i++) {
1256: if ((i == 0) || (doubles[i] < minimum)) {
1257: minIndex = i;
1258: minimum = doubles[i];
1259: }
1260: }
1261:
1262: return minIndex;
1263: }
1264:
1265: /**
1266: * Normalizes the doubles in the array by their sum.
1267: *
1268: * @param doubles the array of double
1269: * @exception IllegalArgumentException if sum is Zero or NaN
1270: */
1271: public static void normalize(double[] doubles) {
1272:
1273: double sum = 0;
1274: for (int i = 0; i < doubles.length; i++) {
1275: sum += doubles[i];
1276: }
1277: normalize(doubles, sum);
1278: }
1279:
1280: /**
1281: * Normalizes the doubles in the array using the given value.
1282: *
1283: * @param doubles the array of double
1284: * @param sum the value by which the doubles are to be normalized
1285: * @exception IllegalArgumentException if sum is zero or NaN
1286: */
1287: public static void normalize(double[] doubles, double sum) {
1288:
1289: if (Double.isNaN(sum)) {
1290: throw new IllegalArgumentException(
1291: "Can't normalize array. Sum is NaN.");
1292: }
1293: if (sum == 0) {
1294: // Maybe this should just be a return.
1295: throw new IllegalArgumentException(
1296: "Can't normalize array. Sum is zero.");
1297: }
1298: for (int i = 0; i < doubles.length; i++) {
1299: doubles[i] /= sum;
1300: }
1301: }
1302:
1303: /**
1304: * Converts an array containing the natural logarithms of
1305: * probabilities stored in a vector back into probabilities.
1306: * The probabilities are assumed to sum to one.
1307: *
1308: * @param a an array holding the natural logarithms of the probabilities
1309: * @return the converted array
1310: */
1311: public static double[] logs2probs(double[] a) {
1312:
1313: double max = a[maxIndex(a)];
1314: double sum = 0.0;
1315:
1316: double[] result = new double[a.length];
1317: for (int i = 0; i < a.length; i++) {
1318: result[i] = Math.exp(a[i] - max);
1319: sum += result[i];
1320: }
1321:
1322: normalize(result, sum);
1323:
1324: return result;
1325: }
1326:
1327: /**
1328: * Returns the log-odds for a given probabilitiy.
1329: *
1330: * @param prob the probabilitiy
1331: *
1332: * @return the log-odds after the probability has been mapped to
1333: * [Utils.SMALL, 1-Utils.SMALL]
1334: */
1335: public static/*@pure@*/double probToLogOdds(double prob) {
1336:
1337: if (gr(prob, 1) || (sm(prob, 0))) {
1338: throw new IllegalArgumentException(
1339: "probToLogOdds: probability must " + "be in [0,1] "
1340: + prob);
1341: }
1342: double p = SMALL + (1.0 - 2 * SMALL) * prob;
1343: return Math.log(p / (1 - p));
1344: }
1345:
1346: /**
1347: * Rounds a double to the next nearest integer value. The JDK version
1348: * of it doesn't work properly.
1349: *
1350: * @param value the double value
1351: * @return the resulting integer value
1352: */
1353: public static/*@pure@*/int round(double value) {
1354:
1355: int roundedValue = value > 0 ? (int) (value + 0.5)
1356: : -(int) (Math.abs(value) + 0.5);
1357:
1358: return roundedValue;
1359: }
1360:
1361: /**
1362: * Rounds a double to the next nearest integer value in a probabilistic
1363: * fashion (e.g. 0.8 has a 20% chance of being rounded down to 0 and a
1364: * 80% chance of being rounded up to 1). In the limit, the average of
1365: * the rounded numbers generated by this procedure should converge to
1366: * the original double.
1367: *
1368: * @param value the double value
1369: * @param rand the random number generator
1370: * @return the resulting integer value
1371: */
1372: public static int probRound(double value, Random rand) {
1373:
1374: if (value >= 0) {
1375: double lower = Math.floor(value);
1376: double prob = value - lower;
1377: if (rand.nextDouble() < prob) {
1378: return (int) lower + 1;
1379: } else {
1380: return (int) lower;
1381: }
1382: } else {
1383: double lower = Math.floor(Math.abs(value));
1384: double prob = Math.abs(value) - lower;
1385: if (rand.nextDouble() < prob) {
1386: return -((int) lower + 1);
1387: } else {
1388: return -(int) lower;
1389: }
1390: }
1391: }
1392:
1393: /**
1394: * Rounds a double to the given number of decimal places.
1395: *
1396: * @param value the double value
1397: * @param afterDecimalPoint the number of digits after the decimal point
1398: * @return the double rounded to the given precision
1399: */
1400: public static/*@pure@*/double roundDouble(double value,
1401: int afterDecimalPoint) {
1402:
1403: double mask = Math.pow(10.0, (double) afterDecimalPoint);
1404:
1405: return (double) (Math.round(value * mask)) / mask;
1406: }
1407:
1408: /**
1409: * Sorts a given array of integers in ascending order and returns an
1410: * array of integers with the positions of the elements of the original
1411: * array in the sorted array. The sort is stable. (Equal elements remain
1412: * in their original order.)
1413: *
1414: * @param array this array is not changed by the method!
1415: * @return an array of integers with the positions in the sorted
1416: * array.
1417: */
1418: public static/*@pure@*/int[] sort(int[] array) {
1419:
1420: int[] index = new int[array.length];
1421: int[] newIndex = new int[array.length];
1422: int[] helpIndex;
1423: int numEqual;
1424:
1425: for (int i = 0; i < index.length; i++) {
1426: index[i] = i;
1427: }
1428: quickSort(array, index, 0, array.length - 1);
1429:
1430: // Make sort stable
1431: int i = 0;
1432: while (i < index.length) {
1433: numEqual = 1;
1434: for (int j = i + 1; ((j < index.length) && (array[index[i]] == array[index[j]])); j++) {
1435: numEqual++;
1436: }
1437: if (numEqual > 1) {
1438: helpIndex = new int[numEqual];
1439: for (int j = 0; j < numEqual; j++) {
1440: helpIndex[j] = i + j;
1441: }
1442: quickSort(index, helpIndex, 0, numEqual - 1);
1443: for (int j = 0; j < numEqual; j++) {
1444: newIndex[i + j] = index[helpIndex[j]];
1445: }
1446: i += numEqual;
1447: } else {
1448: newIndex[i] = index[i];
1449: i++;
1450: }
1451: }
1452: return newIndex;
1453: }
1454:
1455: /**
1456: * Sorts a given array of doubles in ascending order and returns an
1457: * array of integers with the positions of the elements of the
1458: * original array in the sorted array. NOTE THESE CHANGES: the sort
1459: * is no longer stable and it doesn't use safe floating-point
1460: * comparisons anymore. Occurrences of Double.NaN are treated as
1461: * Double.MAX_VALUE
1462: *
1463: * @param array this array is not changed by the method!
1464: * @return an array of integers with the positions in the sorted
1465: * array.
1466: */
1467: public static/*@pure@*/int[] sort(/*@non_null@*/double[] array) {
1468:
1469: int[] index = new int[array.length];
1470: array = (double[]) array.clone();
1471: for (int i = 0; i < index.length; i++) {
1472: index[i] = i;
1473: if (Double.isNaN(array[i])) {
1474: array[i] = Double.MAX_VALUE;
1475: }
1476: }
1477: quickSort(array, index, 0, array.length - 1);
1478: return index;
1479: }
1480:
1481: /**
1482: * Sorts a given array of doubles in ascending order and returns an
1483: * array of integers with the positions of the elements of the original
1484: * array in the sorted array. The sort is stable (Equal elements remain
1485: * in their original order.) Occurrences of Double.NaN are treated as
1486: * Double.MAX_VALUE
1487: *
1488: * @param array this array is not changed by the method!
1489: * @return an array of integers with the positions in the sorted
1490: * array.
1491: */
1492: public static/*@pure@*/int[] stableSort(double[] array) {
1493:
1494: int[] index = new int[array.length];
1495: int[] newIndex = new int[array.length];
1496: int[] helpIndex;
1497: int numEqual;
1498:
1499: array = (double[]) array.clone();
1500: for (int i = 0; i < index.length; i++) {
1501: index[i] = i;
1502: if (Double.isNaN(array[i])) {
1503: array[i] = Double.MAX_VALUE;
1504: }
1505: }
1506: quickSort(array, index, 0, array.length - 1);
1507:
1508: // Make sort stable
1509:
1510: int i = 0;
1511: while (i < index.length) {
1512: numEqual = 1;
1513: for (int j = i + 1; ((j < index.length) && Utils.eq(
1514: array[index[i]], array[index[j]])); j++)
1515: numEqual++;
1516: if (numEqual > 1) {
1517: helpIndex = new int[numEqual];
1518: for (int j = 0; j < numEqual; j++)
1519: helpIndex[j] = i + j;
1520: quickSort(index, helpIndex, 0, numEqual - 1);
1521: for (int j = 0; j < numEqual; j++)
1522: newIndex[i + j] = index[helpIndex[j]];
1523: i += numEqual;
1524: } else {
1525: newIndex[i] = index[i];
1526: i++;
1527: }
1528: }
1529:
1530: return newIndex;
1531: }
1532:
1533: /**
1534: * Computes the variance for an array of doubles.
1535: *
1536: * @param vector the array
1537: * @return the variance
1538: */
1539: public static/*@pure@*/double variance(double[] vector) {
1540:
1541: double sum = 0, sumSquared = 0;
1542:
1543: if (vector.length <= 1) {
1544: return 0;
1545: }
1546: for (int i = 0; i < vector.length; i++) {
1547: sum += vector[i];
1548: sumSquared += (vector[i] * vector[i]);
1549: }
1550: double result = (sumSquared - (sum * sum / (double) vector.length))
1551: / (double) (vector.length - 1);
1552:
1553: // We don't like negative variance
1554: if (result < 0) {
1555: return 0;
1556: } else {
1557: return result;
1558: }
1559: }
1560:
1561: /**
1562: * Computes the sum of the elements of an array of doubles.
1563: *
1564: * @param doubles the array of double
1565: * @return the sum of the elements
1566: */
1567: public static/*@pure@*/double sum(double[] doubles) {
1568:
1569: double sum = 0;
1570:
1571: for (int i = 0; i < doubles.length; i++) {
1572: sum += doubles[i];
1573: }
1574: return sum;
1575: }
1576:
1577: /**
1578: * Computes the sum of the elements of an array of integers.
1579: *
1580: * @param ints the array of integers
1581: * @return the sum of the elements
1582: */
1583: public static/*@pure@*/int sum(int[] ints) {
1584:
1585: int sum = 0;
1586:
1587: for (int i = 0; i < ints.length; i++) {
1588: sum += ints[i];
1589: }
1590: return sum;
1591: }
1592:
1593: /**
1594: * Returns c*log2(c) for a given integer value c.
1595: *
1596: * @param c an integer value
1597: * @return c*log2(c) (but is careful to return 0 if c is 0)
1598: */
1599: public static/*@pure@*/double xlogx(int c) {
1600:
1601: if (c == 0) {
1602: return 0.0;
1603: }
1604: return c * Utils.log2((double) c);
1605: }
1606:
1607: /**
1608: * Partitions the instances around a pivot. Used by quicksort and
1609: * kthSmallestValue.
1610: *
1611: * @param array the array of doubles to be sorted
1612: * @param index the index into the array of doubles
1613: * @param l the first index of the subset
1614: * @param r the last index of the subset
1615: *
1616: * @return the index of the middle element
1617: */
1618: private static int partition(double[] array, int[] index, int l,
1619: int r) {
1620:
1621: double pivot = array[index[(l + r) / 2]];
1622: int help;
1623:
1624: while (l < r) {
1625: while ((array[index[l]] < pivot) && (l < r)) {
1626: l++;
1627: }
1628: while ((array[index[r]] > pivot) && (l < r)) {
1629: r--;
1630: }
1631: if (l < r) {
1632: help = index[l];
1633: index[l] = index[r];
1634: index[r] = help;
1635: l++;
1636: r--;
1637: }
1638: }
1639: if ((l == r) && (array[index[r]] > pivot)) {
1640: r--;
1641: }
1642:
1643: return r;
1644: }
1645:
1646: /**
1647: * Partitions the instances around a pivot. Used by quicksort and
1648: * kthSmallestValue.
1649: *
1650: * @param array the array of integers to be sorted
1651: * @param index the index into the array of integers
1652: * @param l the first index of the subset
1653: * @param r the last index of the subset
1654: *
1655: * @return the index of the middle element
1656: */
1657: private static int partition(int[] array, int[] index, int l, int r) {
1658:
1659: double pivot = array[index[(l + r) / 2]];
1660: int help;
1661:
1662: while (l < r) {
1663: while ((array[index[l]] < pivot) && (l < r)) {
1664: l++;
1665: }
1666: while ((array[index[r]] > pivot) && (l < r)) {
1667: r--;
1668: }
1669: if (l < r) {
1670: help = index[l];
1671: index[l] = index[r];
1672: index[r] = help;
1673: l++;
1674: r--;
1675: }
1676: }
1677: if ((l == r) && (array[index[r]] > pivot)) {
1678: r--;
1679: }
1680:
1681: return r;
1682: }
1683:
1684: /**
1685: * Implements quicksort according to Manber's "Introduction to
1686: * Algorithms".
1687: *
1688: * @param array the array of doubles to be sorted
1689: * @param index the index into the array of doubles
1690: * @param left the first index of the subset to be sorted
1691: * @param right the last index of the subset to be sorted
1692: */
1693: //@ requires 0 <= first && first <= right && right < array.length;
1694: //@ requires (\forall int i; 0 <= i && i < index.length; 0 <= index[i] && index[i] < array.length);
1695: //@ requires array != index;
1696: // assignable index;
1697: private static void quickSort(/*@non_null@*/double[] array, /*@non_null@*/
1698: int[] index, int left, int right) {
1699:
1700: if (left < right) {
1701: int middle = partition(array, index, left, right);
1702: quickSort(array, index, left, middle);
1703: quickSort(array, index, middle + 1, right);
1704: }
1705: }
1706:
1707: /**
1708: * Implements quicksort according to Manber's "Introduction to
1709: * Algorithms".
1710: *
1711: * @param array the array of integers to be sorted
1712: * @param index the index into the array of integers
1713: * @param left the first index of the subset to be sorted
1714: * @param right the last index of the subset to be sorted
1715: */
1716: //@ requires 0 <= first && first <= right && right < array.length;
1717: //@ requires (\forall int i; 0 <= i && i < index.length; 0 <= index[i] && index[i] < array.length);
1718: //@ requires array != index;
1719: // assignable index;
1720: private static void quickSort(/*@non_null@*/int[] array, /*@non_null@*/
1721: int[] index, int left, int right) {
1722:
1723: if (left < right) {
1724: int middle = partition(array, index, left, right);
1725: quickSort(array, index, left, middle);
1726: quickSort(array, index, middle + 1, right);
1727: }
1728: }
1729:
1730: /**
1731: * Implements computation of the kth-smallest element according
1732: * to Manber's "Introduction to Algorithms".
1733: *
1734: * @param array the array of double
1735: * @param index the index into the array of doubles
1736: * @param left the first index of the subset
1737: * @param right the last index of the subset
1738: * @param k the value of k
1739: *
1740: * @return the index of the kth-smallest element
1741: */
1742: //@ requires 0 <= first && first <= right && right < array.length;
1743: private static int select(/*@non_null@*/double[] array, /*@non_null@*/
1744: int[] index, int left, int right, int k) {
1745:
1746: if (left == right) {
1747: return left;
1748: } else {
1749: int middle = partition(array, index, left, right);
1750: if ((middle - left + 1) >= k) {
1751: return select(array, index, left, middle, k);
1752: } else {
1753: return select(array, index, middle + 1, right, k
1754: - (middle - left + 1));
1755: }
1756: }
1757: }
1758:
1759: /**
1760: * Implements computation of the kth-smallest element according
1761: * to Manber's "Introduction to Algorithms".
1762: *
1763: * @param array the array of integers
1764: * @param index the index into the array of integers
1765: * @param left the first index of the subset
1766: * @param right the last index of the subset
1767: * @param k the value of k
1768: *
1769: * @return the index of the kth-smallest element
1770: */
1771: //@ requires 0 <= first && first <= right && right < array.length;
1772: private static int select(/*@non_null@*/int[] array, /*@non_null@*/
1773: int[] index, int left, int right, int k) {
1774:
1775: if (left == right) {
1776: return left;
1777: } else {
1778: int middle = partition(array, index, left, right);
1779: if ((middle - left + 1) >= k) {
1780: return select(array, index, left, middle, k);
1781: } else {
1782: return select(array, index, middle + 1, right, k
1783: - (middle - left + 1));
1784: }
1785: }
1786: }
1787:
1788: /**
1789: * Main method for testing this class.
1790: *
1791: * @param ops some dummy options
1792: */
1793: public static void main(String[] ops) {
1794:
1795: double[] doublesWithNaN = { 4.5, 6.7, Double.NaN, 3.4, 4.8,
1796: 1.2, 3.4 };
1797: double[] doubles = { 4.5, 6.7, 6.7, 3.4, 4.8, 1.2, 3.4, 6.7,
1798: 6.7, 3.4 };
1799: int[] ints = { 12, 6, 2, 18, 16, 6, 7, 5, 18, 18, 17 };
1800:
1801: try {
1802:
1803: // Option handling
1804: System.out.println("First option split up:");
1805: if (ops.length > 0) {
1806: String[] firstOptionSplitUp = Utils
1807: .splitOptions(ops[0]);
1808: for (int i = 0; i < firstOptionSplitUp.length; i++) {
1809: System.out.println(firstOptionSplitUp[i]);
1810: }
1811: }
1812: System.out.println("Partitioned options: ");
1813: String[] partitionedOptions = Utils.partitionOptions(ops);
1814: for (int i = 0; i < partitionedOptions.length; i++) {
1815: System.out.println(partitionedOptions[i]);
1816: }
1817: System.out.println("Get position of flag -f: "
1818: + Utils.getOptionPos('f', ops));
1819: System.out.println("Get flag -f: "
1820: + Utils.getFlag('f', ops));
1821: System.out.println("Get position of option -o: "
1822: + Utils.getOptionPos('o', ops));
1823: System.out.println("Get option -o: "
1824: + Utils.getOption('o', ops));
1825: System.out.println("Checking for remaining options... ");
1826: Utils.checkForRemainingOptions(ops);
1827:
1828: // Statistics
1829: System.out.println("Original array with NaN (doubles): ");
1830: for (int i = 0; i < doublesWithNaN.length; i++) {
1831: System.out.print(doublesWithNaN[i] + " ");
1832: }
1833: System.out.println();
1834: System.out.println("Original array (doubles): ");
1835: for (int i = 0; i < doubles.length; i++) {
1836: System.out.print(doubles[i] + " ");
1837: }
1838: System.out.println();
1839: System.out.println("Original array (ints): ");
1840: for (int i = 0; i < ints.length; i++) {
1841: System.out.print(ints[i] + " ");
1842: }
1843: System.out.println();
1844: System.out.println("Correlation: "
1845: + Utils.correlation(doubles, doubles,
1846: doubles.length));
1847: System.out.println("Mean: " + Utils.mean(doubles));
1848: System.out.println("Variance: " + Utils.variance(doubles));
1849: System.out.println("Sum (doubles): " + Utils.sum(doubles));
1850: System.out.println("Sum (ints): " + Utils.sum(ints));
1851: System.out.println("Max index (doubles): "
1852: + Utils.maxIndex(doubles));
1853: System.out.println("Max index (ints): "
1854: + Utils.maxIndex(ints));
1855: System.out.println("Min index (doubles): "
1856: + Utils.minIndex(doubles));
1857: System.out.println("Min index (ints): "
1858: + Utils.minIndex(ints));
1859: System.out.println("Median (doubles): "
1860: + Utils.kthSmallestValue(doubles,
1861: doubles.length / 2));
1862: System.out.println("Median (ints): "
1863: + Utils.kthSmallestValue(ints, ints.length / 2));
1864:
1865: // Sorting and normalizing
1866: System.out.println("Sorted array with NaN (doubles): ");
1867: int[] sorted = Utils.sort(doublesWithNaN);
1868: for (int i = 0; i < doublesWithNaN.length; i++) {
1869: System.out.print(doublesWithNaN[sorted[i]] + " ");
1870: }
1871: System.out.println();
1872: System.out.println("Sorted array (doubles): ");
1873: sorted = Utils.sort(doubles);
1874: for (int i = 0; i < doubles.length; i++) {
1875: System.out.print(doubles[sorted[i]] + " ");
1876: }
1877: System.out.println();
1878: System.out.println("Sorted array (ints): ");
1879: sorted = Utils.sort(ints);
1880: for (int i = 0; i < ints.length; i++) {
1881: System.out.print(ints[sorted[i]] + " ");
1882: }
1883: System.out.println();
1884: System.out.println("Indices from stable sort (doubles): ");
1885: sorted = Utils.stableSort(doubles);
1886: for (int i = 0; i < doubles.length; i++) {
1887: System.out.print(sorted[i] + " ");
1888: }
1889: System.out.println();
1890: System.out.println("Indices from sort (ints): ");
1891: sorted = Utils.sort(ints);
1892: for (int i = 0; i < ints.length; i++) {
1893: System.out.print(sorted[i] + " ");
1894: }
1895: System.out.println();
1896: System.out.println("Normalized array (doubles): ");
1897: Utils.normalize(doubles);
1898: for (int i = 0; i < doubles.length; i++) {
1899: System.out.print(doubles[i] + " ");
1900: }
1901: System.out.println();
1902: System.out.println("Normalized again (doubles): ");
1903: Utils.normalize(doubles, Utils.sum(doubles));
1904: for (int i = 0; i < doubles.length; i++) {
1905: System.out.print(doubles[i] + " ");
1906: }
1907: System.out.println();
1908:
1909: // Pretty-printing
1910: System.out.println("-4.58: "
1911: + Utils.doubleToString(-4.57826535, 2));
1912: System.out.println("-6.78: "
1913: + Utils.doubleToString(-6.78214234, 6, 2));
1914:
1915: // Comparisons
1916: System.out.println("5.70001 == 5.7 ? "
1917: + Utils.eq(5.70001, 5.7));
1918: System.out.println("5.70001 > 5.7 ? "
1919: + Utils.gr(5.70001, 5.7));
1920: System.out.println("5.70001 >= 5.7 ? "
1921: + Utils.grOrEq(5.70001, 5.7));
1922: System.out.println("5.7 < 5.70001 ? "
1923: + Utils.sm(5.7, 5.70001));
1924: System.out.println("5.7 <= 5.70001 ? "
1925: + Utils.smOrEq(5.7, 5.70001));
1926:
1927: // Math
1928: System.out.println("Info (ints): " + Utils.info(ints));
1929: System.out.println("log2(4.6): " + Utils.log2(4.6));
1930: System.out.println("5 * log(5): " + Utils.xlogx(5));
1931: System.out.println("5.5 rounded: " + Utils.round(5.5));
1932: System.out.println("5.55555 rounded to 2 decimal places: "
1933: + Utils.roundDouble(5.55555, 2));
1934:
1935: // Arrays
1936: System.out.println("Array-Dimensions of 'new int[][]': "
1937: + Utils.getArrayDimensions(new int[][] {}));
1938: System.out
1939: .println("Array-Dimensions of 'new int[][]{{1,2,3},{4,5,6}}': "
1940: + Utils.getArrayDimensions(new int[][] {
1941: { 1, 2, 3 }, { 4, 5, 6 } }));
1942: String[][][] s = new String[3][4][];
1943: System.out
1944: .println("Array-Dimensions of 'new String[3][4][]': "
1945: + Utils.getArrayDimensions(s));
1946: } catch (Exception e) {
1947: e.printStackTrace();
1948: }
1949: }
1950: }
|