0001: /**
0002: *******************************************************************************
0003: * Copyright (C) 2002-2004, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: */package com.ibm.icu.dev.test.perf;
0007:
0008: import com.ibm.icu.text.*;
0009: import java.util.*;
0010: import java.io.*;
0011: import com.ibm.icu.impl.LocaleUtility;
0012:
0013: public class CollationPerformanceTest {
0014: static final String usageString = "usage: collperf options...\n"
0015: + "-help Display this message.\n"
0016: + "-file file_name utf-16 format file of names.\n"
0017: + "-locale name ICU locale to use. Default is en_US\n"
0018: + "-rules file_name Collation rules file (overrides locale)\n"
0019: //+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
0020: //+ " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
0021: //+ "-win Run test using Windows native services. (ICU is default)\n"
0022: //+ "-unix Run test using Unix strxfrm, strcoll services.\n"
0023: //+ "-uselen Use API with string lengths. Default is null-terminated strings\n"
0024: + "-usekeys Run tests using sortkeys rather than strcoll\n"
0025: + "-strcmp Run tests using u_strcmp rather than strcoll\n"
0026: + "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n"
0027: + "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
0028: + "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
0029: + " under test at each call point. For measuring test overhead.\n"
0030: + "-terse Terse numbers-only output. Intended for use by scripts.\n"
0031: + "-french French accent ordering\n"
0032: + "-frenchoff No French accent ordering (for use with French locales.)\n"
0033: + "-norm Normalizing mode on\n"
0034: + "-shifted Shifted mode\n"
0035: + "-lower Lower case first\n"
0036: + "-upper Upper case first\n"
0037: + "-case Enable separate case level\n"
0038: + "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
0039: + "-keyhist Produce a table sort key size vs. string length\n"
0040: + "-binsearch Binary Search timing test\n"
0041: + "-keygen Sort Key Generation timing test\n"
0042: + "-qsort Quicksort timing test\n"
0043: + "-iter Iteration Performance Test\n"
0044: + "-dump Display strings, sort keys and CEs.\n"
0045: + "-java Run test using java.text.Collator.\n";
0046:
0047: //enum {FLAG, NUM, STRING} type;
0048: static StringBuffer temp_opt_fName = new StringBuffer("");
0049: static StringBuffer temp_opt_locale = new StringBuffer("en_US");
0050: //static StringBuffer temp_opt_langid = new StringBuffer("0"); // Defaults to value corresponding to opt_locale.
0051: static StringBuffer temp_opt_rules = new StringBuffer("");
0052: static StringBuffer temp_opt_help = new StringBuffer("");
0053: static StringBuffer temp_opt_loopCount = new StringBuffer("1");
0054: static StringBuffer temp_opt_iLoopCount = new StringBuffer("1");
0055: static StringBuffer temp_opt_terse = new StringBuffer("false");
0056: static StringBuffer temp_opt_qsort = new StringBuffer("");
0057: static StringBuffer temp_opt_binsearch = new StringBuffer("");
0058: static StringBuffer temp_opt_icu = new StringBuffer("true");
0059: //static StringBuffer opt_win = new StringBuffer(""); // Run with Windows native functions.
0060: //static StringBuffer opt_unix = new StringBuffer(""); // Run with UNIX strcoll, strxfrm functions.
0061: //static StringBuffer opt_uselen = new StringBuffer("");
0062: static StringBuffer temp_opt_usekeys = new StringBuffer("");
0063: static StringBuffer temp_opt_strcmp = new StringBuffer("");
0064: static StringBuffer temp_opt_strcmpCPO = new StringBuffer("");
0065: static StringBuffer temp_opt_norm = new StringBuffer("");
0066: static StringBuffer temp_opt_keygen = new StringBuffer("");
0067: static StringBuffer temp_opt_french = new StringBuffer("");
0068: static StringBuffer temp_opt_frenchoff = new StringBuffer("");
0069: static StringBuffer temp_opt_shifted = new StringBuffer("");
0070: static StringBuffer temp_opt_lower = new StringBuffer("");
0071: static StringBuffer temp_opt_upper = new StringBuffer("");
0072: static StringBuffer temp_opt_case = new StringBuffer("");
0073: static StringBuffer temp_opt_level = new StringBuffer("0");
0074: static StringBuffer temp_opt_keyhist = new StringBuffer("");
0075: static StringBuffer temp_opt_itertest = new StringBuffer("");
0076: static StringBuffer temp_opt_dump = new StringBuffer("");
0077: static StringBuffer temp_opt_java = new StringBuffer("");
0078:
0079: static String opt_fName = "";
0080: static String opt_locale = "en_US";
0081: //static int opt_langid = 0; // Defaults to value corresponding to opt_locale.
0082: static String opt_rules = "";
0083: static boolean opt_help = false;
0084: static int opt_loopCount = 1;
0085: static int opt_iLoopCount = 1;
0086: static boolean opt_terse = false;
0087: static boolean opt_qsort = false;
0088: static boolean opt_binsearch = false;
0089: static boolean opt_icu = true;
0090: //static boolean opt_win = false; // Run with Windows native functions.
0091: //static boolean opt_unix = false; // Run with UNIX strcoll, strxfrm functions.
0092: //static boolean opt_uselen = false;
0093: static boolean opt_usekeys = false;
0094: static boolean opt_strcmp = false;
0095: static boolean opt_strcmpCPO = false;
0096: static boolean opt_norm = false;
0097: static boolean opt_keygen = false;
0098: static boolean opt_french = false;
0099: static boolean opt_frenchoff = false;
0100: static boolean opt_shifted = false;
0101: static boolean opt_lower = false;
0102: static boolean opt_upper = false;
0103: static boolean opt_case = false;
0104: static int opt_level = 0;
0105: static boolean opt_keyhist = false;
0106: static boolean opt_itertest = false;
0107: static boolean opt_dump = false;
0108: static boolean opt_java = false;
0109:
0110: static OptionSpec[] options = {
0111: new OptionSpec("-file", 2, temp_opt_fName),
0112: new OptionSpec("-locale", 2, temp_opt_locale),
0113: //new OptionSpec("-langid", 1, temp_opt_langid),
0114: new OptionSpec("-rules", 2, temp_opt_rules),
0115: new OptionSpec("-qsort", 0, temp_opt_qsort),
0116: new OptionSpec("-binsearch", 0, temp_opt_binsearch),
0117: new OptionSpec("-iter", 0, temp_opt_itertest),
0118: //new OptionSpec("-win", 0, temp_opt_win),
0119: //new OptionSpec("-unix", 0, temp_opt_unix),
0120: //new OptionSpec("-uselen", 0, temp_opt_uselen),
0121: new OptionSpec("-usekeys", 0, temp_opt_usekeys),
0122: new OptionSpec("-strcmp", 0, temp_opt_strcmp),
0123: new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO),
0124: new OptionSpec("-norm", 0, temp_opt_norm),
0125: new OptionSpec("-french", 0, temp_opt_french),
0126: new OptionSpec("-frenchoff", 0, temp_opt_frenchoff),
0127: new OptionSpec("-shifted", 0, temp_opt_shifted),
0128: new OptionSpec("-lower", 0, temp_opt_lower),
0129: new OptionSpec("-upper", 0, temp_opt_upper),
0130: new OptionSpec("-case", 0, temp_opt_case),
0131: new OptionSpec("-level", 1, temp_opt_level),
0132: new OptionSpec("-keyhist", 0, temp_opt_keyhist),
0133: new OptionSpec("-keygen", 0, temp_opt_keygen),
0134: new OptionSpec("-loop", 1, temp_opt_loopCount),
0135: new OptionSpec("-iloop", 1, temp_opt_iLoopCount),
0136: new OptionSpec("-terse", 0, temp_opt_terse),
0137: new OptionSpec("-dump", 0, temp_opt_dump),
0138: new OptionSpec("-help", 0, temp_opt_help),
0139: new OptionSpec("-?", 0, temp_opt_help),
0140: new OptionSpec("-java", 0, temp_opt_java), };
0141:
0142: static java.text.Collator javaCol = null;
0143: static com.ibm.icu.text.Collator icuCol = null;
0144: static NumberFormat nf = null;
0145: static NumberFormat percent = null;
0146: ArrayList list = null;
0147: String[] tests = null;
0148: int globalCount = 0;
0149:
0150: public static void main(String[] args) {
0151: CollationPerformanceTest collPerf = new CollationPerformanceTest();
0152: if (!CollationPerformanceTest.processOptions(args) || opt_help
0153: || opt_fName.length() == 0) {
0154: System.out.println(usageString);
0155: System.exit(1);
0156: }
0157:
0158: nf = NumberFormat.getInstance();
0159: nf.setMaximumFractionDigits(2);
0160: percent = NumberFormat.getPercentInstance();
0161:
0162: collPerf.setOptions();
0163: collPerf.readDataLines();
0164:
0165: if (opt_dump) {
0166: collPerf.doDump();
0167: }
0168:
0169: if (opt_qsort) {
0170: collPerf.doQSort();
0171: }
0172:
0173: if (opt_binsearch) {
0174: collPerf.doBinarySearch();
0175: }
0176:
0177: if (opt_keygen) {
0178: collPerf.doKeyGen();
0179: }
0180:
0181: if (opt_keyhist) {
0182: collPerf.doKeyHist();
0183: }
0184:
0185: if (opt_itertest) {
0186: collPerf.doIterTest();
0187: }
0188:
0189: }
0190:
0191: //Dump file lines, CEs, Sort Keys if requested
0192: void doDump() {
0193: for (int i = 0; i < list.size(); i++) {
0194: //print the line
0195: String line = com.ibm.icu.impl.Utility.escape((String) list
0196: .get(i));
0197: System.out.println(line);
0198:
0199: System.out.print(" CEs: ");
0200: CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator) icuCol)
0201: .getCollationElementIterator(line);
0202: int ce;
0203: int j = 0;
0204: for (;;) {
0205: ce = CEiter.next();
0206: if (ce == CollationElementIterator.NULLORDER) {
0207: break;
0208: }
0209: //System.out.print();
0210: String outStr = Integer.toHexString(ce);
0211: for (int len = 0; len < 8 - outStr.length(); len++) {
0212: outStr = '0' + outStr;
0213: }
0214: System.out.print(outStr + " ");
0215: if (++j > 8) {
0216: System.out.print("\n ");
0217: j = 0;
0218: }
0219: }
0220:
0221: System.out.print("\n ICU Sort Key: ");
0222: CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator) icuCol)
0223: .getCollationKey(line);
0224: byte[] cks = ck.toByteArray();
0225: j = 0;
0226: for (int k = 0; k < cks.length; k++) {
0227: String outStr = Integer.toHexString(cks[k]);
0228: switch (outStr.length()) {
0229: case 1:
0230: outStr = '0' + outStr;
0231: break;
0232: case 8:
0233: outStr = outStr.substring(6);
0234: break;
0235: }
0236: System.out.print(outStr);
0237: System.out.print(" ");
0238: j++;
0239: if (j > 0 && j % 20 == 0) {
0240: System.out.print("\n ");
0241: }
0242: }
0243: System.out.println("\n");
0244: }
0245: }
0246:
0247: /**---------------------------------------------------------------------------------------
0248: *
0249: * doQSort() The quick sort timing test.
0250: *
0251: *---------------------------------------------------------------------------------------
0252: */
0253: void doQSort() {
0254: callGC();
0255: //String[] sortTests = (String[]) tests.clone();
0256: //Adjust loop count to compensate for file size. QSort should be nlog(n)
0257: double dLoopCount = opt_loopCount
0258: * 3000
0259: / ((Math.log(tests.length) / Math.log(10) * tests.length));
0260:
0261: if (opt_usekeys) {
0262: dLoopCount *= 5;
0263: }
0264:
0265: int adj_loopCount = (int) dLoopCount;
0266: if (adj_loopCount < 1) {
0267: adj_loopCount = 1;
0268: }
0269:
0270: globalCount = 0;
0271: long startTime = 0;
0272: long endTime = 0;
0273: if (opt_icu && opt_usekeys) {
0274: startTime = System.currentTimeMillis();
0275: qSortImpl_icu_usekeys(tests, 0, tests.length - 1, icuCol);
0276: endTime = System.currentTimeMillis();
0277: }
0278: if (opt_icu && !opt_usekeys) {
0279: startTime = System.currentTimeMillis();
0280: qSortImpl_nokeys(tests, 0, tests.length - 1, icuCol);
0281: endTime = System.currentTimeMillis();
0282: }
0283: if (opt_java && opt_usekeys) {
0284: startTime = System.currentTimeMillis();
0285: qSortImpl_java_usekeys(tests, 0, tests.length - 1, javaCol);
0286: endTime = System.currentTimeMillis();
0287: }
0288: if (opt_java && !opt_usekeys) {
0289: startTime = System.currentTimeMillis();
0290: qSortImpl_nokeys(tests, 0, tests.length - 1, javaCol);
0291: endTime = System.currentTimeMillis();
0292: }
0293: long elapsedTime = endTime - startTime;
0294: int ns = (int) (1000000 * elapsedTime / (globalCount + 0.0));
0295: if (!opt_terse) {
0296: System.out.println("qsort: total # of string compares = "
0297: + globalCount);
0298: System.out.println("qsort: time per compare = " + ns);
0299: } else {
0300: System.out.println(ns);
0301: }
0302: }
0303:
0304: /**---------------------------------------------------------------------------------------
0305: *
0306: * doBinarySearch() Binary Search timing test. Each name from the list
0307: * is looked up in the full sorted list of names.
0308: *
0309: *---------------------------------------------------------------------------------------
0310: */
0311: void doBinarySearch() {
0312: callGC();
0313: int gCount = 0;
0314: int loops = 0;
0315: double dLoopCount = opt_loopCount
0316: * 3000
0317: / (Math.log(tests.length) / Math.log(10) * tests.length);
0318: long startTime = 0;
0319: long elapsedTime = 0;
0320:
0321: if (opt_usekeys) {
0322: dLoopCount *= 5;
0323: }
0324: int adj_loopCount = (int) dLoopCount;
0325: if (adj_loopCount < 1) {
0326: adj_loopCount = 1;
0327: }
0328:
0329: //int opt2 = 0;
0330:
0331: for (;;) { //not really a loop, just allows "break" to work, to simplify
0332: //inadvertantly running more than one test through here
0333: if (opt_strcmp) {
0334: int r = 0;
0335: startTime = System.currentTimeMillis();
0336: for (loops = 0; loops < adj_loopCount; loops++) {
0337: for (int j = 0; j < tests.length; j++) {
0338: int hi = tests.length - 1;
0339: int lo = 0;
0340: int guess = -1;
0341: for (;;) {
0342: int newGuess = (hi + lo) / 2;
0343: if (newGuess == guess) {
0344: break;
0345: }
0346: guess = newGuess;
0347: r = tests[j].compareTo(tests[guess]);
0348: gCount++;
0349: if (r == 0) {
0350: break;
0351: }
0352: if (r < 0) {
0353: hi = guess;
0354: } else {
0355: lo = guess;
0356: }
0357: }
0358: }
0359: }
0360: elapsedTime = System.currentTimeMillis() - startTime;
0361: break;
0362: }
0363:
0364: if (opt_strcmpCPO) {
0365: int r = 0;
0366: startTime = System.currentTimeMillis();
0367: for (loops = 0; loops < adj_loopCount; loops++) {
0368: for (int j = 0; j < tests.length; j++) {
0369: int hi = tests.length - 1;
0370: int lo = 0;
0371: int guess = -1;
0372: for (;;) {
0373: int newGuess = (hi + lo) / 2;
0374: if (newGuess == guess) {
0375: break;
0376: }
0377: guess = newGuess;
0378: r = com.ibm.icu.text.Normalizer
0379: .compare(
0380: tests[j],
0381: tests[guess],
0382: Normalizer.COMPARE_CODE_POINT_ORDER);
0383: gCount++;
0384: if (r == 0) {
0385: break;
0386: }
0387: if (r < 0) {
0388: hi = guess;
0389: } else {
0390: lo = guess;
0391: }
0392: }
0393: }
0394: }
0395: elapsedTime = System.currentTimeMillis() - startTime;
0396: break;
0397: }
0398:
0399: if (opt_icu) {
0400:
0401: int r = 0;
0402: startTime = System.currentTimeMillis();
0403: for (loops = 0; loops < adj_loopCount; loops++) {
0404: for (int j = 0; j < tests.length; j++) {
0405: int hi = tests.length - 1;
0406: int lo = 0;
0407: int guess = -1;
0408: for (;;) {
0409: int newGuess = (hi + lo) / 2;
0410: if (newGuess == guess) {
0411: break;
0412: }
0413: guess = newGuess;
0414: if (opt_usekeys) {
0415: com.ibm.icu.text.CollationKey sortKey1 = icuCol
0416: .getCollationKey(tests[j]);
0417: com.ibm.icu.text.CollationKey sortKey2 = icuCol
0418: .getCollationKey(tests[guess]);
0419: r = sortKey1.compareTo(sortKey2);
0420: gCount++;
0421: } else {
0422: r = icuCol.compare(tests[j],
0423: tests[guess]);
0424: gCount++;
0425: }
0426: if (r == 0) {
0427: break;
0428: }
0429: if (r < 0) {
0430: hi = guess;
0431: } else {
0432: lo = guess;
0433: }
0434: }
0435: }
0436: }
0437: elapsedTime = System.currentTimeMillis() - startTime;
0438: break;
0439: }
0440: if (opt_java) {
0441:
0442: int r = 0;
0443: startTime = System.currentTimeMillis();
0444: for (loops = 0; loops < adj_loopCount; loops++) {
0445: for (int j = 0; j < tests.length; j++) {
0446: int hi = tests.length - 1;
0447: int lo = 0;
0448: int guess = -1;
0449: for (;;) {
0450: int newGuess = (hi + lo) / 2;
0451: if (newGuess == guess) {
0452: break;
0453: }
0454: guess = newGuess;
0455: if (opt_usekeys) {
0456: java.text.CollationKey sortKey1 = javaCol
0457: .getCollationKey(tests[j]);
0458: java.text.CollationKey sortKey2 = javaCol
0459: .getCollationKey(tests[guess]);
0460: r = sortKey1.compareTo(sortKey2);
0461: gCount++;
0462: } else {
0463: r = javaCol.compare(tests[j],
0464: tests[guess]);
0465: gCount++;
0466: }
0467: if (r == 0) {
0468: break;
0469: }
0470: if (r < 0) {
0471: hi = guess;
0472: } else {
0473: lo = guess;
0474: }
0475: }
0476: }
0477: }
0478: elapsedTime = System.currentTimeMillis() - startTime;
0479: break;
0480: }
0481: break;
0482: }
0483: int ns = (int) ((float) (1000000) * (float) elapsedTime / (float) gCount);
0484: if (!opt_terse) {
0485: System.out
0486: .println("binary search: total # of string compares = "
0487: + gCount);
0488: System.out.println("binary search: compares per loop = "
0489: + gCount / loops);
0490: System.out.println("binary search: time per compare = "
0491: + ns);
0492: } else {
0493: System.out.println(ns);
0494: }
0495: }
0496:
0497: /**---------------------------------------------------------------------------------------
0498: *
0499: * doKeyGen() Key Generation Timing Test
0500: *
0501: *---------------------------------------------------------------------------------------
0502: */
0503: void doKeyGen() {
0504: callGC();
0505:
0506: // Adjust loop count to compensate for file size. Should be order n
0507: double dLoopCount = opt_loopCount
0508: * (1000.0 / (double) list.size());
0509: int adj_loopCount = (int) dLoopCount;
0510: if (adj_loopCount < 1)
0511: adj_loopCount = 1;
0512:
0513: long startTime = 0;
0514: long totalKeyLen = 0;
0515: long totalChars = 0;
0516: if (opt_java) {
0517: startTime = System.currentTimeMillis();
0518: for (int loops = 0; loops < adj_loopCount; loops++) {
0519: for (int line = 0; line < tests.length; line++) {
0520: for (int iLoop = 0; iLoop < opt_iLoopCount; iLoop++) {
0521: totalChars += tests[line].length();
0522: byte[] sortKey = javaCol.getCollationKey(
0523: tests[line]).toByteArray();
0524: totalKeyLen += sortKey.length;
0525: }
0526: }
0527: }
0528: } else {
0529: startTime = System.currentTimeMillis();
0530: for (int loops = 0; loops < adj_loopCount; loops++) {
0531: for (int line = 0; line < tests.length; line++) {
0532: for (int iLoop = 0; iLoop < opt_iLoopCount; iLoop++) {
0533: totalChars += tests[line].length();
0534: byte[] sortKey = icuCol.getCollationKey(
0535: tests[line]).toByteArray();
0536: totalKeyLen += sortKey.length;
0537: }
0538: }
0539: }
0540: }
0541:
0542: long elapsedTime = System.currentTimeMillis() - startTime;
0543: long ns = (long) (1000000 * elapsedTime / (adj_loopCount
0544: * tests.length + 0.0));
0545: if (!opt_terse) {
0546: System.out
0547: .println("Sort Key Generation: total # of keys ="
0548: + adj_loopCount * tests.length);
0549: System.out.println("Sort Key Generation: time per key = "
0550: + ns + " ns");
0551: System.out.println("Key Length / character = "
0552: + nf.format(totalKeyLen / (totalChars + 0.0)));
0553: } else {
0554: System.out.print(ns + ", ");
0555: System.out.println(nf.format(totalKeyLen
0556: / (totalChars + 0.0))
0557: + ", ");
0558: }
0559: }
0560:
0561: /**---------------------------------------------------------------------------------------
0562: *
0563: * doKeyHist() Output a table of data for average sort key size vs. string length.
0564: *
0565: *---------------------------------------------------------------------------------------
0566: */
0567: void doKeyHist() {
0568: callGC();
0569: int maxLen = 0;
0570:
0571: // Find the maximum string length
0572: for (int i = 0; i < tests.length; i++) {
0573: if (tests[i].length() > maxLen)
0574: maxLen = tests[i].length();
0575: }
0576:
0577: int[] accumulatedLen = new int[maxLen + 1];
0578: int[] numKeysOfSize = new int[maxLen + 1];
0579:
0580: // Fill the arrays...
0581: for (int i = 0; i < tests.length; i++) {
0582: int len = tests[i].length();
0583: accumulatedLen[len] += icuCol.getCollationKey(tests[i])
0584: .toByteArray().length;
0585: numKeysOfSize[len] += 1;
0586: }
0587:
0588: // And write out averages
0589: System.out
0590: .println("String Length, Avg Key Length, Avg Key Len per char");
0591: for (int i = 1; i <= maxLen; i++) {
0592: if (numKeysOfSize[i] > 0) {
0593: System.out.println(i
0594: + ", "
0595: + nf.format(accumulatedLen[i]
0596: / (numKeysOfSize[i] + 0.0))
0597: + ", "
0598: + nf.format(accumulatedLen[i]
0599: / (numKeysOfSize[i] * i + 0.0)));
0600: }
0601: }
0602:
0603: }
0604:
0605: void doForwardIterTest() {
0606: callGC();
0607: System.out
0608: .print("\n\nPerforming forward iteration performance test with ");
0609: System.out
0610: .println("performance test on strings from file -----------");
0611:
0612: CollationElementIterator iter = ((RuleBasedCollator) icuCol)
0613: .getCollationElementIterator("");
0614:
0615: int gCount = 0;
0616: int count = 0;
0617: long startTime = System.currentTimeMillis();
0618: while (count < opt_loopCount) {
0619: int linecount = 0;
0620: while (linecount < tests.length) {
0621: String str = tests[linecount];
0622: iter.setText(str);
0623: while (iter.next() != CollationElementIterator.NULLORDER) {
0624: gCount++;
0625: }
0626: linecount++;
0627: }
0628: count++;
0629: }
0630:
0631: long elapsedTime = System.currentTimeMillis() - startTime;
0632: System.out.println("elapsedTime " + elapsedTime + " ms");
0633:
0634: // empty loop recalculation
0635: count = 0;
0636: startTime = System.currentTimeMillis();
0637: while (count < opt_loopCount) {
0638: int linecount = 0;
0639: while (linecount < tests.length) {
0640: String str = tests[linecount];
0641: iter.setText(str);
0642: linecount++;
0643: }
0644: count++;
0645: }
0646: elapsedTime -= (System.currentTimeMillis() - startTime);
0647: System.out.println("elapsedTime " + elapsedTime + " ms");
0648:
0649: int ns = (int) (1000000 * elapsedTime / (gCount + 0.0));
0650: System.out.println("Total number of strings compared "
0651: + tests.length + "in " + opt_loopCount + " loops");
0652: System.out
0653: .println("Average time per CollationElementIterator.next() nano seconds "
0654: + ns);
0655: System.out
0656: .println("performance test on skipped-5 concatenated strings from file -----------");
0657:
0658: String totalStr = "";
0659: int strlen = 0;
0660: // appending all the strings
0661: int linecount = 0;
0662: while (linecount < tests.length) {
0663: totalStr += tests[linecount];
0664: strlen += tests[linecount].length();
0665: linecount++;
0666: }
0667: System.out.println("Total size of strings " + strlen);
0668:
0669: gCount = 0;
0670: count = 0;
0671: iter = ((RuleBasedCollator) icuCol)
0672: .getCollationElementIterator(totalStr);
0673: strlen -= 5; // any left over characters are not iterated,
0674: // this is to ensure the backwards and forwards iterators
0675: // gets the same position
0676: int strindex = 0;
0677: startTime = System.currentTimeMillis();
0678: while (count < opt_loopCount) {
0679: int count5 = 5;
0680: strindex = 0;
0681: iter.setOffset(strindex);
0682: while (true) {
0683: if (iter.next() == CollationElementIterator.NULLORDER) {
0684: break;
0685: }
0686: gCount++;
0687: count5--;
0688: if (count5 == 0) {
0689: strindex += 10;
0690: if (strindex > strlen) {
0691: break;
0692: }
0693: iter.setOffset(strindex);
0694: count5 = 5;
0695: }
0696: }
0697: count++;
0698: }
0699:
0700: elapsedTime = System.currentTimeMillis() - startTime;
0701: System.out.println("elapsedTime " + elapsedTime);
0702:
0703: // empty loop recalculation
0704: int tempgCount = 0;
0705: count = 0;
0706: startTime = System.currentTimeMillis();
0707: while (count < opt_loopCount) {
0708: int count5 = 5;
0709: strindex = 0;
0710: iter.setOffset(strindex);
0711: while (true) {
0712: tempgCount++;
0713: count5--;
0714: if (count5 == 0) {
0715: strindex += 10;
0716: if (strindex > strlen) {
0717: break;
0718: }
0719: iter.setOffset(strindex);
0720: count5 = 5;
0721: }
0722: }
0723: count++;
0724: }
0725: elapsedTime -= (System.currentTimeMillis() - startTime);
0726: System.out.println("elapsedTime " + elapsedTime);
0727:
0728: System.out.println("gCount " + gCount);
0729: ns = (int) (1000000 * elapsedTime / (gCount + 0.0));
0730: System.out
0731: .println("Average time per CollationElementIterator.next() nano seconds "
0732: + ns);
0733: }
0734:
0735: void doBackwardIterTest() {
0736: System.out
0737: .print("\n\nPerforming backward iteration performance test with ");
0738: System.out
0739: .println("performance test on strings from file -----------\n");
0740:
0741: CollationElementIterator iter = ((RuleBasedCollator) icuCol)
0742: .getCollationElementIterator("");
0743:
0744: int gCount = 0;
0745: int count = 0;
0746: long startTime = System.currentTimeMillis();
0747: while (count < opt_loopCount) {
0748: int linecount = 0;
0749: while (linecount < tests.length) {
0750: String str = tests[linecount];
0751: iter.setText(str);
0752: while (iter.previous() != CollationElementIterator.NULLORDER) {
0753: gCount++;
0754: }
0755: linecount++;
0756: }
0757: count++;
0758: }
0759: long elapsedTime = System.currentTimeMillis() - startTime;
0760: System.out.println("elapsedTime " + elapsedTime + " ms");
0761:
0762: // empty loop recalculation
0763: count = 0;
0764: startTime = System.currentTimeMillis();
0765: while (count < opt_loopCount) {
0766: int linecount = 0;
0767: while (linecount < tests.length) {
0768: String str = tests[linecount];
0769: iter.setText(str);
0770: linecount++;
0771: }
0772: count++;
0773: }
0774: elapsedTime -= (System.currentTimeMillis() - startTime);
0775: System.out.println("elapsedTime " + elapsedTime + " ms");
0776:
0777: int ns = (int) (1000000 * elapsedTime / (gCount + 0.0));
0778: System.out.println("Total number of strings compared "
0779: + tests.length + "in " + opt_loopCount + " loops");
0780: System.out
0781: .println("Average time per CollationElementIterator.previous() nano seconds "
0782: + ns);
0783: System.out
0784: .println("performance test on skipped-5 concatenated strings from file -----------");
0785:
0786: String totalStr = "";
0787: int strlen = 0;
0788: // appending all the strings
0789: int linecount = 0;
0790: while (linecount < tests.length) {
0791: totalStr += tests[linecount];
0792: strlen += tests[linecount].length();
0793: linecount++;
0794: }
0795: System.out.println("Total size of strings " + strlen);
0796:
0797: gCount = 0;
0798: count = 0;
0799:
0800: iter = ((RuleBasedCollator) icuCol)
0801: .getCollationElementIterator(totalStr);
0802: int strindex = 0;
0803: startTime = System.currentTimeMillis();
0804: while (count < opt_loopCount) {
0805: int count5 = 5;
0806: strindex = 5;
0807: iter.setOffset(strindex);
0808: while (true) {
0809: if (iter.previous() == CollationElementIterator.NULLORDER) {
0810: break;
0811: }
0812: gCount++;
0813: count5--;
0814: if (count5 == 0) {
0815: strindex += 10;
0816: if (strindex > strlen) {
0817: break;
0818: }
0819: iter.setOffset(strindex);
0820: count5 = 5;
0821: }
0822: }
0823: count++;
0824: }
0825:
0826: elapsedTime = System.currentTimeMillis() - startTime;
0827: System.out.println("elapsedTime " + elapsedTime);
0828:
0829: // empty loop recalculation
0830: count = 0;
0831: int tempgCount = 0;
0832: startTime = System.currentTimeMillis();
0833: while (count < opt_loopCount) {
0834: int count5 = 5;
0835: strindex = 5;
0836: iter.setOffset(strindex);
0837: while (true) {
0838: tempgCount++;
0839: count5--;
0840: if (count5 == 0) {
0841: strindex += 10;
0842: if (strindex > strlen) {
0843: break;
0844: }
0845: iter.setOffset(strindex);
0846: count5 = 5;
0847: }
0848: }
0849: count++;
0850: }
0851: elapsedTime -= (System.currentTimeMillis() - startTime);
0852: System.out.println("elapsedTime " + elapsedTime);
0853:
0854: System.out.println("gCount " + gCount);
0855: ns = (int) (1000000 * elapsedTime / (gCount + 0.0));
0856: System.out
0857: .println("Average time per CollationElementIterator.previous() nano seconds "
0858: + ns);
0859: }
0860:
0861: /**---------------------------------------------------------------------------------------
0862: *
0863: * doIterTest() Iteration test
0864: *
0865: *---------------------------------------------------------------------------------------
0866: */
0867: void doIterTest() {
0868: doForwardIterTest();
0869: doBackwardIterTest();
0870: }
0871:
0872: void setOptions() {
0873:
0874: if (opt_java) {
0875: opt_icu = false;
0876: }
0877:
0878: if (opt_rules.length() != 0) {
0879: try {
0880: icuCol = new com.ibm.icu.text.RuleBasedCollator(
0881: getCollationRules(opt_rules));
0882: } catch (Exception e) {
0883: System.out.println("Cannot open rules:"
0884: + e.getMessage());
0885: System.exit(1);
0886: }
0887: } else {
0888: icuCol = com.ibm.icu.text.Collator
0889: .getInstance(LocaleUtility
0890: .getLocaleFromName(opt_locale));
0891: }
0892:
0893: javaCol = java.text.Collator.getInstance(LocaleUtility
0894: .getLocaleFromName(opt_locale));
0895:
0896: if (opt_norm) {
0897: javaCol
0898: .setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION);
0899: icuCol
0900: .setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION);
0901: }
0902:
0903: if (opt_french && opt_frenchoff) {
0904: System.err
0905: .println("Error: specified both -french and -frenchoff options.");
0906: }
0907:
0908: if (opt_french) {
0909: ((com.ibm.icu.text.RuleBasedCollator) icuCol)
0910: .setFrenchCollation(true);
0911: }
0912: if (opt_frenchoff) {
0913: ((com.ibm.icu.text.RuleBasedCollator) icuCol)
0914: .setFrenchCollation(false);
0915: }
0916:
0917: if (opt_lower) {
0918: ((com.ibm.icu.text.RuleBasedCollator) icuCol)
0919: .setLowerCaseFirst(true);
0920: }
0921:
0922: if (opt_upper) {
0923: ((com.ibm.icu.text.RuleBasedCollator) icuCol)
0924: .setUpperCaseFirst(true);
0925: }
0926:
0927: if (opt_shifted) {
0928: ((com.ibm.icu.text.RuleBasedCollator) icuCol)
0929: .setAlternateHandlingShifted(true);
0930: }
0931:
0932: if (opt_level != 0) {
0933: switch (opt_level) {
0934: case 1:
0935: javaCol.setStrength(java.text.Collator.PRIMARY);
0936: icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY);
0937: break;
0938: case 2:
0939: javaCol.setStrength(java.text.Collator.SECONDARY);
0940: icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY);
0941: break;
0942: case 3:
0943: javaCol.setStrength(java.text.Collator.TERTIARY);
0944: icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY);
0945: break;
0946: case 4:
0947: icuCol
0948: .setStrength(com.ibm.icu.text.Collator.QUATERNARY);
0949: break;
0950: case 5:
0951: javaCol.setStrength(java.text.Collator.IDENTICAL);
0952: icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL);
0953: break;
0954: default:
0955: System.err
0956: .println("-level param must be between 1 and 5\n");
0957: System.exit(1);
0958: }
0959: }
0960: // load classes at least once before starting
0961: javaCol.compare("a", "b");
0962: icuCol.compare("a", "b");
0963: }
0964:
0965: static boolean processOptions(String[] args) {
0966: int argNum;
0967: for (argNum = 0; argNum < args.length; argNum++) {
0968: for (int i = 0; i < options.length; i++) {
0969: if (args[argNum].equalsIgnoreCase(options[i].name)) {
0970: switch (options[i].type) {
0971: case 0:
0972: options[i].value.delete(0,
0973: options[i].value.capacity()).append(
0974: "true");
0975: break;
0976: case 1:
0977: argNum++;
0978: if ((argNum >= args.length)
0979: || (args[argNum].charAt(0) == '-')) {
0980: System.err.println("value expected for"
0981: + options[i].name + "option.\n");
0982: return false;
0983: }
0984: try {
0985: /* int value =*/Integer
0986: .parseInt(args[argNum]);
0987: options[i].value.delete(0,
0988: options[i].value.capacity())
0989: .append(args[argNum]);
0990: } catch (NumberFormatException e) {
0991: System.err
0992: .println("Expected: a number value");
0993: return false;
0994: }
0995: break;
0996: case 2:
0997: argNum++;
0998: if ((argNum >= args.length)
0999: || (args[argNum].charAt(0) == '-')) {
1000: System.err.println("value expected for"
1001: + options[i].name + "option.\n");
1002: return false;
1003: }
1004: options[i].value.delete(0,
1005: options[i].value.capacity()).append(
1006: args[argNum]);
1007: break;
1008: default:
1009: System.err
1010: .println("Option type error: {FLAG=0, NUM=1, STRING=2}");
1011: return false;
1012: }
1013: }
1014: }
1015: }
1016:
1017: opt_fName = temp_opt_fName.toString();
1018: opt_locale = temp_opt_locale.toString();
1019: opt_rules = temp_opt_rules.toString();
1020: if (temp_opt_help.toString().equalsIgnoreCase("true")) {
1021: opt_help = true;
1022: }
1023: opt_loopCount = Integer.parseInt(temp_opt_loopCount.toString());
1024: opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount
1025: .toString());
1026: if (temp_opt_terse.toString().equalsIgnoreCase("true")) {
1027: opt_terse = true;
1028: }
1029: if (temp_opt_qsort.toString().equalsIgnoreCase("true")) {
1030: opt_qsort = true;
1031: }
1032: if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) {
1033: opt_binsearch = true;
1034: }
1035: if (temp_opt_icu.toString().equalsIgnoreCase("true")) {
1036: opt_icu = true;
1037: }
1038: if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) {
1039: opt_usekeys = true;
1040: }
1041: if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) {
1042: opt_strcmp = true;
1043: }
1044: if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) {
1045: opt_strcmpCPO = true;
1046: }
1047: if (temp_opt_keygen.toString().equalsIgnoreCase("true")) {
1048: opt_keygen = true;
1049: }
1050: if (temp_opt_norm.toString().equalsIgnoreCase("true")) {
1051: opt_norm = true;
1052: }
1053: if (temp_opt_french.toString().equalsIgnoreCase("true")) {
1054: opt_french = true;
1055: }
1056: if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) {
1057: opt_frenchoff = true;
1058: }
1059: if (temp_opt_shifted.toString().equalsIgnoreCase("true")) {
1060: opt_shifted = true;
1061: }
1062: if (temp_opt_lower.toString().equalsIgnoreCase("true")) {
1063: opt_lower = true;
1064: }
1065: if (temp_opt_upper.toString().equalsIgnoreCase("true")) {
1066: opt_upper = true;
1067: }
1068: if (temp_opt_case.toString().equalsIgnoreCase("true")) {
1069: opt_case = true;
1070: }
1071: opt_level = Integer.parseInt(temp_opt_level.toString());
1072: if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) {
1073: opt_keyhist = true;
1074: }
1075: if (temp_opt_itertest.toString().equalsIgnoreCase("true")) {
1076: opt_itertest = true;
1077: }
1078: if (temp_opt_dump.toString().equalsIgnoreCase("true")) {
1079: opt_dump = true;
1080: }
1081: if (temp_opt_java.toString().equalsIgnoreCase("true")) {
1082: opt_java = true;
1083: }
1084:
1085: return true;
1086: }
1087:
1088: /**
1089: * Invoke the runtime's garbage collection procedure repeatedly
1090: * until the amount of free memory stabilizes to within 10%.
1091: */
1092: private void callGC() {
1093: // From "Java Platform Performance". This is the procedure
1094: // recommended by Javasoft.
1095: try {
1096: System.gc();
1097: Thread.sleep(100);
1098: System.runFinalization();
1099: Thread.sleep(100);
1100:
1101: System.gc();
1102: Thread.sleep(100);
1103: System.runFinalization();
1104: Thread.sleep(100);
1105: } catch (InterruptedException e) {
1106: }
1107: }
1108:
1109: private boolean needCRLF = false;
1110:
1111: public int DOTMASK = 0x7FF;
1112:
1113: void dot(int i) {
1114: if ((i % DOTMASK) == 0) {
1115: needCRLF = true;
1116: // I do not know why print the dot here
1117: //System.out.print('.');
1118: }
1119: }
1120:
1121: String readDataLine(BufferedReader br) throws Exception {
1122: String originalLine = "";
1123: String line = "";
1124:
1125: try {
1126: line = originalLine = br.readLine();
1127: if (line == null)
1128: return null;
1129: if (line.length() > 0 && line.charAt(0) == 0xFEFF)
1130: line = line.substring(1);
1131: int commentPos = line.indexOf('#');
1132: if (commentPos >= 0)
1133: line = line.substring(0, commentPos);
1134: line = line.trim();
1135: } catch (Exception e) {
1136: throw new Exception("Line \"{0}\", \"{1}\"" + originalLine
1137: + " " + line + " " + e.toString());
1138: }
1139: return line;
1140: }
1141:
1142: void readDataLines() {
1143: // Read in the input file.
1144: // File assumed to be utf-16.
1145: // Lines go onto heap buffers. Global index array to line starts is created.
1146: // Lines themselves are null terminated.
1147: //
1148: FileInputStream fis = null;
1149: InputStreamReader isr = null;
1150: BufferedReader br = null;
1151: try {
1152: fis = new FileInputStream(opt_fName);
1153: isr = new InputStreamReader(fis, "UTF-8");
1154: br = new BufferedReader(isr, 32 * 1024);
1155: } catch (Exception e) {
1156: System.err.println("Error: File access exception: "
1157: + e.getMessage() + "!");
1158: System.exit(2);
1159: }
1160:
1161: int counter = 0;
1162:
1163: list = new ArrayList();
1164: while (true) {
1165: String line = null;
1166: try {
1167: line = readDataLine(br);
1168: } catch (Exception e) {
1169: System.err.println("Read File Error" + e.getMessage()
1170: + "!");
1171: System.exit(1);
1172: }
1173:
1174: if (line == null)
1175: break;
1176: if (line.length() == 0)
1177: continue;
1178: dot(counter++);
1179: list.add(line);
1180: }
1181: if (!opt_terse) {
1182: System.out.println("Read " + counter + " lines in file");
1183: }
1184:
1185: int size = list.size();
1186: tests = new String[size];
1187:
1188: for (int i = 0; i < size; ++i) {
1189: tests[i] = (String) list.get(i);
1190: }
1191: }
1192:
1193: /**
1194: * Get the Collator Rules
1195: * The Rule File format:
1196: * 1. leading and trailing whitespaces will be omitted
1197: * 2. lines with the leading character '#' will be treated as comments
1198: * 3. File encoding is ISO-8859-1
1199: */
1200: String getCollationRules(String ruleFileName) {
1201: FileInputStream fis = null;
1202: InputStreamReader isr = null;
1203: BufferedReader br = null;
1204: try {
1205: fis = new FileInputStream(opt_rules);
1206: isr = new InputStreamReader(fis, "ISO-8859-1");
1207: br = new BufferedReader(isr);
1208: } catch (Exception e) {
1209: System.err.println("Error: File access exception: "
1210: + e.getMessage() + "!");
1211: System.exit(2);
1212: }
1213: String rules = "";
1214: String line = "";
1215: while (true) {
1216: try {
1217: line = br.readLine();
1218: } catch (IOException e) {
1219: System.err.println("Read File Error" + e.getMessage()
1220: + "!");
1221: System.exit(1);
1222: }
1223: if (line == null) {
1224: break;
1225: }
1226: int commentPos = line.indexOf('#');
1227: if (commentPos >= 0)
1228: line = line.substring(0, commentPos);
1229: line = line.trim();
1230: rules = rules + line;
1231: }
1232: return rules;
1233: }
1234:
1235: //Implementing qsort
1236: void qSortImpl_java_usekeys(String src[], int fromIndex,
1237: int toIndex, java.text.Collator c) {
1238: int low = fromIndex;
1239: int high = toIndex;
1240: String middle = "";
1241: if (high > low) {
1242: middle = src[(low + high) / 2];
1243: while (low <= high) {
1244: while ((low < toIndex)
1245: && (compare(c.getCollationKey(src[low]), c
1246: .getCollationKey(middle)) < 0)) {
1247: ++low;
1248: }
1249: while ((high > fromIndex)
1250: && (compare(c.getCollationKey(src[high]), c
1251: .getCollationKey(middle)) > 0)) {
1252: --high;
1253: }
1254: if (low <= high) {
1255: String swap = src[low];
1256: src[low] = src[high];
1257: src[high] = swap;
1258: ++low;
1259: --high;
1260: }
1261: }
1262: if (fromIndex < high) {
1263: qSortImpl_java_usekeys(src, fromIndex, high, c);
1264: }
1265:
1266: if (low < toIndex) {
1267: qSortImpl_java_usekeys(src, low, toIndex, c);
1268: }
1269: }
1270: }
1271:
1272: void qSortImpl_icu_usekeys(String src[], int fromIndex,
1273: int toIndex, com.ibm.icu.text.Collator c) {
1274: int low = fromIndex;
1275: int high = toIndex;
1276: String middle = "";
1277: if (high > low) {
1278: middle = src[(low + high) / 2];
1279: while (low <= high) {
1280: while ((low < toIndex)
1281: && (compare(c.getCollationKey(src[low]), c
1282: .getCollationKey(middle)) < 0)) {
1283: ++low;
1284: }
1285: while ((high > fromIndex)
1286: && (compare(c.getCollationKey(src[high]), c
1287: .getCollationKey(middle)) > 0)) {
1288: --high;
1289: }
1290: if (low <= high) {
1291: String swap = src[low];
1292: src[low] = src[high];
1293: src[high] = swap;
1294: ++low;
1295: --high;
1296: }
1297: }
1298: if (fromIndex < high) {
1299: qSortImpl_icu_usekeys(src, fromIndex, high, c);
1300: }
1301:
1302: if (low < toIndex) {
1303: qSortImpl_icu_usekeys(src, low, toIndex, c);
1304: }
1305: }
1306: }
1307:
1308: void qSortImpl_nokeys(String src[], int fromIndex, int toIndex,
1309: Comparator c) {
1310: int low = fromIndex;
1311: int high = toIndex;
1312: String middle = "";
1313: if (high > low) {
1314: middle = src[(low + high) / 2];
1315: while (low <= high) {
1316: while ((low < toIndex)
1317: && (compare(src[low], middle, c) < 0)) {
1318: ++low;
1319: }
1320: while ((high > fromIndex)
1321: && (compare(src[high], middle, c) > 0)) {
1322: --high;
1323: }
1324: if (low <= high) {
1325: String swap = src[low];
1326: src[low] = src[high];
1327: src[high] = swap;
1328: ++low;
1329: --high;
1330: }
1331: }
1332: if (fromIndex < high) {
1333: qSortImpl_nokeys(src, fromIndex, high, c);
1334: }
1335:
1336: if (low < toIndex) {
1337: qSortImpl_nokeys(src, low, toIndex, c);
1338: }
1339: }
1340: }
1341:
1342: int compare(String source, String target, Comparator c) {
1343: globalCount++;
1344: return c.compare(source, target);
1345: }
1346:
1347: int compare(java.text.CollationKey source,
1348: java.text.CollationKey target) {
1349: globalCount++;
1350: return source.compareTo(target);
1351: }
1352:
1353: int compare(com.ibm.icu.text.CollationKey source,
1354: com.ibm.icu.text.CollationKey target) {
1355: globalCount++;
1356: return source.compareTo(target);
1357: }
1358:
1359: //Class for command line option
1360: static class OptionSpec {
1361: String name;
1362: int type;
1363: StringBuffer value;
1364:
1365: public OptionSpec(String name, int type, StringBuffer value) {
1366: this.name = name;
1367: this.type = type;
1368: this.value = value;
1369: }
1370: }
1371: }
|