001: /*
002: * LsearchCmd.java
003: *
004: * Copyright (c) 1997 Cornell University.
005: * Copyright (c) 1997 Sun Microsystems, Inc.
006: * Copyright (c) 1998-1999 by Scriptics Corporation.
007: * Copyright (c) 2000 Christian Krone.
008: *
009: * See the file "license.terms" for information on usage and
010: * redistribution of this file, and for a DISCLAIMER OF ALL
011: * WARRANTIES.
012: *
013: * RCS: @(#) $Id: LsearchCmd.java,v 1.2 2000/08/21 04:12:51 mo Exp $
014: *
015: */
016:
017: package tcl.lang;
018:
019: /*
020: * This class implements the built-in "lsearch" command in Tcl.
021: */
022:
023: class LsearchCmd implements Command {
024:
025: static final private String[] options = { "-ascii", "-decreasing",
026: "-dictionary", "-exact", "-increasing", "-integer",
027: "-glob", "-real", "-regexp", "-sorted" };
028: static final int LSEARCH_ASCII = 0;
029: static final int LSEARCH_DECREASING = 1;
030: static final int LSEARCH_DICTIONARY = 2;
031: static final int LSEARCH_EXACT = 3;
032: static final int LSEARCH_INCREASING = 4;
033: static final int LSEARCH_INTEGER = 5;
034: static final int LSEARCH_GLOB = 6;
035: static final int LSEARCH_REAL = 7;
036: static final int LSEARCH_REGEXP = 8;
037: static final int LSEARCH_SORTED = 9;
038:
039: static final int ASCII = 0;
040: static final int DICTIONARY = 1;
041: static final int INTEGER = 2;
042: static final int REAL = 3;
043:
044: static final int EXACT = 0;
045: static final int GLOB = 1;
046: static final int REGEXP = 2;
047: static final int SORTED = 3;
048:
049: /*
050: *-----------------------------------------------------------------------------
051: *
052: * cmdProc --
053: *
054: * This procedure is invoked to process the "lsearch" Tcl command.
055: * See the user documentation for details on what it does.
056: *
057: * Results:
058: * None.
059: *
060: * Side effects:
061: * See the user documentation.
062: *
063: *-----------------------------------------------------------------------------
064: */
065:
066: public void cmdProc(Interp interp, // Current interpreter.
067: TclObject[] objv) // Arguments to "lsearch" command.
068: throws TclException {
069: int mode = GLOB;
070: int dataType = ASCII;
071: boolean isIncreasing = true;
072: TclObject pattern = null;
073: TclObject list = null;
074:
075: if (objv.length < 3) {
076: throw new TclNumArgsException(interp, 1, objv,
077: "?options? list pattern");
078: }
079:
080: for (int i = 1; i < objv.length - 2; i++) {
081: switch (TclIndex.get(interp, objv[i], options, "option", 0)) {
082: case LSEARCH_ASCII: // -ascii
083: dataType = ASCII;
084: break;
085: case LSEARCH_DECREASING: // -decreasing
086: isIncreasing = false;
087: break;
088: case LSEARCH_DICTIONARY: // -dictionary
089: dataType = DICTIONARY;
090: break;
091: case LSEARCH_EXACT: // -increasing
092: mode = EXACT;
093: break;
094: case LSEARCH_INCREASING: // -increasing
095: isIncreasing = true;
096: break;
097: case LSEARCH_INTEGER: // -integer
098: dataType = INTEGER;
099: break;
100: case LSEARCH_GLOB: // -glob
101: mode = GLOB;
102: break;
103: case LSEARCH_REAL: // -real
104: dataType = REAL;
105: break;
106: case LSEARCH_REGEXP: // -regexp
107: mode = REGEXP;
108: break;
109: case LSEARCH_SORTED: // -sorted
110: mode = SORTED;
111: break;
112: }
113: }
114:
115: // Make sure the list argument is a list object and get its length and
116: // a pointer to its array of element pointers.
117:
118: TclObject[] listv = TclList.getElements(interp,
119: objv[objv.length - 2]);
120:
121: TclObject patObj = objv[objv.length - 1];
122: String patternBytes = null;
123: int patInt = 0;
124: double patDouble = 0.0;
125: int length = 0;
126: if (mode == EXACT || mode == SORTED) {
127: switch (dataType) {
128: case ASCII:
129: case DICTIONARY:
130: patternBytes = patObj.toString();
131: length = patternBytes.length();
132: break;
133: case INTEGER:
134: patInt = TclInteger.get(interp, patObj);
135: break;
136: case REAL:
137: patDouble = TclDouble.get(interp, patObj);
138: break;
139: }
140: } else {
141: patternBytes = patObj.toString();
142: length = patternBytes.length();
143: }
144:
145: // Set default index value to -1, indicating failure; if we find the
146: // item in the course of our search, index will be set to the correct
147: // value.
148:
149: int index = -1;
150: if (mode == SORTED) {
151: // If the data is sorted, we can do a more intelligent search.
152: int match = 0;
153: int lower = -1;
154: int upper = listv.length;
155: while (lower + 1 != upper) {
156: int i = (lower + upper) / 2;
157: switch (dataType) {
158: case ASCII: {
159: String bytes = listv[i].toString();
160: match = patternBytes.compareTo(bytes);
161: break;
162: }
163: case DICTIONARY: {
164: String bytes = listv[i].toString();
165: match = DictionaryCompare(patternBytes, bytes);
166: break;
167: }
168: case INTEGER: {
169: int objInt = TclInteger.get(interp, listv[i]);
170: if (patInt == objInt) {
171: match = 0;
172: } else if (patInt < objInt) {
173: match = -1;
174: } else {
175: match = 1;
176: }
177: break;
178: }
179: case REAL: {
180: double objDouble = TclDouble.get(interp, listv[i]);
181: if (patDouble == objDouble) {
182: match = 0;
183: } else if (patDouble < objDouble) {
184: match = -1;
185: } else {
186: match = 1;
187: }
188: break;
189: }
190: }
191: if (match == 0) {
192:
193: // Normally, binary search is written to stop when it
194: // finds a match. If there are duplicates of an element in
195: // the list, our first match might not be the first occurance.
196: // Consider: 0 0 0 1 1 1 2 2 2
197: // To maintain consistancy with standard lsearch semantics,
198: // we must find the leftmost occurance of the pattern in the
199: // list. Thus we don't just stop searching here. This
200: // variation means that a search always makes log n
201: // comparisons (normal binary search might "get lucky" with
202: // an early comparison).
203:
204: index = i;
205: upper = i;
206: } else if (match > 0) {
207: if (isIncreasing) {
208: lower = i;
209: } else {
210: upper = i;
211: }
212: } else {
213: if (isIncreasing) {
214: upper = i;
215: } else {
216: lower = i;
217: }
218: }
219: }
220: } else {
221: for (int i = 0; i < listv.length; i++) {
222: boolean match = false;
223: switch (mode) {
224: case SORTED:
225: case EXACT: {
226: switch (dataType) {
227: case ASCII: {
228: String bytes = listv[i].toString();
229: int elemLen = bytes.length();
230: if (length == elemLen) {
231: match = bytes.equals(patternBytes);
232: }
233: break;
234: }
235: case DICTIONARY: {
236: String bytes = listv[i].toString();
237: match = (DictionaryCompare(bytes, patternBytes) == 0);
238: break;
239: }
240: case INTEGER: {
241: int objInt = TclInteger.get(interp, listv[i]);
242: match = (objInt == patInt);
243: break;
244: }
245: case REAL: {
246: double objDouble = TclDouble.get(interp,
247: listv[i]);
248: match = (objDouble == patDouble);
249: break;
250: }
251: }
252: break;
253: }
254: case GLOB: {
255: match = Util.stringMatch(listv[i].toString(),
256: patternBytes);
257: break;
258: }
259: case REGEXP: {
260: match = Util.regExpMatch(interp, listv[i]
261: .toString(), patObj);
262: break;
263: }
264: }
265: if (match) {
266: index = i;
267: break;
268: }
269: }
270: }
271: interp.setResult(index);
272: }
273:
274: /*
275: *----------------------------------------------------------------------
276: *
277: * DictionaryCompare -> dictionaryCompare
278: *
279: * This function compares two strings as if they were being used in
280: * an index or card catalog. The case of alphabetic characters is
281: * ignored, except to break ties. Thus "B" comes before "b" but
282: * after "a". Also, integers embedded in the strings compare in
283: * numerical order. In other words, "x10y" comes after "x9y", not
284: * before it as it would when using strcmp().
285: *
286: * Results:
287: * A negative result means that the first element comes before the
288: * second, and a positive result means that the second element
289: * should come first. A result of zero means the two elements
290: * are equal and it doesn't matter which comes first.
291: *
292: * Side effects:
293: * None.
294: *
295: *----------------------------------------------------------------------
296: */
297:
298: private static int DictionaryCompare(String left, String right) // The strings to compare
299: {
300: char[] leftArr = left.toCharArray();
301: char[] rightArr = right.toCharArray();
302: char leftChar, rightChar, leftLower, rightLower;
303: int lInd = 0;
304: int rInd = 0;
305: int diff;
306: int secondaryDiff = 0;
307:
308: while (true) {
309: if ((rInd < rightArr.length)
310: && (Character.isDigit(rightArr[rInd]))
311: && (lInd < leftArr.length)
312: && (Character.isDigit(leftArr[lInd]))) {
313: // There are decimal numbers embedded in the two
314: // strings. Compare them as numbers, rather than
315: // strings. If one number has more leading zeros than
316: // the other, the number with more leading zeros sorts
317: // later, but only as a secondary choice.
318:
319: int zeros = 0;
320: while ((rightArr[rInd] == '0')
321: && (rInd + 1 < rightArr.length)
322: && (Character.isDigit(rightArr[rInd + 1]))) {
323: rInd++;
324: zeros--;
325: }
326: while ((leftArr[lInd] == '0')
327: && (lInd + 1 < leftArr.length)
328: && (Character.isDigit(leftArr[lInd + 1]))) {
329: lInd++;
330: zeros++;
331: }
332: if (secondaryDiff == 0) {
333: secondaryDiff = zeros;
334: }
335:
336: // The code below compares the numbers in the two
337: // strings without ever converting them to integers. It
338: // does this by first comparing the lengths of the
339: // numbers and then comparing the digit values.
340:
341: diff = 0;
342: while (true) {
343: if ((diff == 0) && (lInd < leftArr.length)
344: && (rInd < rightArr.length)) {
345: diff = leftArr[lInd] - rightArr[rInd];
346: }
347: rInd++;
348: lInd++;
349: if (rInd >= rightArr.length
350: || !Character.isDigit(rightArr[rInd])) {
351: if (lInd < leftArr.length
352: && Character.isDigit(leftArr[lInd])) {
353: return 1;
354: } else {
355: // The two numbers have the same length. See
356: // if their values are different.
357:
358: if (diff != 0) {
359: return diff;
360: }
361: break;
362: }
363: } else if (lInd >= leftArr.length
364: || !Character.isDigit(leftArr[lInd])) {
365: return -1;
366: }
367: }
368: continue;
369: }
370:
371: // Convert character to Unicode for comparison purposes. If either
372: // string is at the terminating null, do a byte-wise comparison and
373: // bail out immediately.
374:
375: if ((lInd < leftArr.length) && (rInd < rightArr.length)) {
376:
377: // Convert both chars to lower for the comparison, because
378: // dictionary sorts are case insensitve. Covert to lower, not
379: // upper, so chars between Z and a will sort before A (where most
380: // other interesting punctuations occur)
381:
382: leftChar = leftArr[lInd++];
383: rightChar = rightArr[rInd++];
384: leftLower = Character.toLowerCase(leftChar);
385: rightLower = Character.toLowerCase(rightChar);
386: } else if (lInd < leftArr.length) {
387: diff = -rightArr[rInd];
388: break;
389: } else if (rInd < rightArr.length) {
390: diff = leftArr[lInd];
391: break;
392: } else {
393: diff = 0;
394: break;
395: }
396:
397: diff = leftLower - rightLower;
398: if (diff != 0) {
399: return diff;
400: } else if (secondaryDiff == 0) {
401: if (Character.isUpperCase(leftChar)
402: && Character.isLowerCase(rightChar)) {
403: secondaryDiff = -1;
404: } else if (Character.isUpperCase(rightChar)
405: && Character.isLowerCase(leftChar)) {
406: secondaryDiff = 1;
407: }
408: }
409: }
410: if (diff == 0) {
411: diff = secondaryDiff;
412: }
413: return diff;
414: }
415:
416: } // end LsearchCmd
|