0001: /*
0002: * StringCmd.java
0003: *
0004: * Copyright (c) 1997 Cornell University.
0005: * Copyright (c) 1997 Sun Microsystems, Inc.
0006: * Copyright (c) 1998-2000 Scriptics Corporation.
0007: * Copyright (c) 2000 Christian Krone.
0008: *
0009: * See the file "license.terms" for information on usage and
0010: * redistribution of this file, and for a DISCLAIMER OF ALL
0011: * WARRANTIES.
0012: *
0013: * RCS: @(#) $Id: StringCmd.java,v 1.16 2006/06/13 06:52:47 mdejong Exp $
0014: *
0015: */
0016:
0017: package tcl.lang;
0018:
0019: import java.util.*;
0020:
0021: /**
0022: * This class implements the built-in "string" command in Tcl.
0023: */
0024:
0025: class StringCmd implements Command {
0026:
0027: static final private String options[] = { "bytelength", "compare",
0028: "equal", "first", "index", "is", "last", "length", "map",
0029: "match", "range", "repeat", "replace", "tolower",
0030: "toupper", "totitle", "trim", "trimleft", "trimright",
0031: "wordend", "wordstart" };
0032: static final private int STR_BYTELENGTH = 0;
0033: static final private int STR_COMPARE = 1;
0034: static final private int STR_EQUAL = 2;
0035: static final private int STR_FIRST = 3;
0036: static final private int STR_INDEX = 4;
0037: static final private int STR_IS = 5;
0038: static final private int STR_LAST = 6;
0039: static final private int STR_LENGTH = 7;
0040: static final private int STR_MAP = 8;
0041: static final private int STR_MATCH = 9;
0042: static final private int STR_RANGE = 10;
0043: static final private int STR_REPEAT = 11;
0044: static final private int STR_REPLACE = 12;
0045: static final private int STR_TOLOWER = 13;
0046: static final private int STR_TOUPPER = 14;
0047: static final private int STR_TOTITLE = 15;
0048: static final private int STR_TRIM = 16;
0049: static final private int STR_TRIMLEFT = 17;
0050: static final private int STR_TRIMRIGHT = 18;
0051: static final private int STR_WORDEND = 19;
0052: static final private int STR_WORDSTART = 20;
0053:
0054: static final private String isOptions[] = { "alnum", "alpha",
0055: "ascii", "control", "boolean", "digit", "double", "false",
0056: "graph", "integer", "lower", "print", "punct", "space",
0057: "true", "upper", "wordchar", "xdigit" };
0058: static final private int STR_IS_ALNUM = 0;
0059: static final private int STR_IS_ALPHA = 1;
0060: static final private int STR_IS_ASCII = 2;
0061: static final private int STR_IS_CONTROL = 3;
0062: static final private int STR_IS_BOOL = 4;
0063: static final private int STR_IS_DIGIT = 5;
0064: static final private int STR_IS_DOUBLE = 6;
0065: static final private int STR_IS_FALSE = 7;
0066: static final private int STR_IS_GRAPH = 8;
0067: static final private int STR_IS_INT = 9;
0068: static final private int STR_IS_LOWER = 10;
0069: static final private int STR_IS_PRINT = 11;
0070: static final private int STR_IS_PUNCT = 12;
0071: static final private int STR_IS_SPACE = 13;
0072: static final private int STR_IS_TRUE = 14;
0073: static final private int STR_IS_UPPER = 15;
0074: static final private int STR_IS_WORD = 16;
0075: static final private int STR_IS_XDIGIT = 17;
0076:
0077: /**
0078: * Java's Character class has a many boolean test functions to check
0079: * the kind of a character (like isLowerCase() or isISOControl()).
0080: * Unfortunately some are missing (like isPunct() or isPrint()), so
0081: * here we define bitsets to compare the result of Character.getType().
0082: */
0083:
0084: static final private int ALPHA_BITS = ((1 << Character.UPPERCASE_LETTER)
0085: | (1 << Character.LOWERCASE_LETTER)
0086: | (1 << Character.TITLECASE_LETTER)
0087: | (1 << Character.MODIFIER_LETTER) | (1 << Character.OTHER_LETTER));
0088: static final private int PUNCT_BITS = ((1 << Character.CONNECTOR_PUNCTUATION)
0089: | (1 << Character.DASH_PUNCTUATION)
0090: | (1 << Character.START_PUNCTUATION)
0091: | (1 << Character.END_PUNCTUATION) | (1 << Character.OTHER_PUNCTUATION));
0092: static final private int PRINT_BITS = (ALPHA_BITS
0093: | (1 << Character.DECIMAL_DIGIT_NUMBER)
0094: | (1 << Character.SPACE_SEPARATOR)
0095: | (1 << Character.LINE_SEPARATOR)
0096: | (1 << Character.PARAGRAPH_SEPARATOR)
0097: | (1 << Character.NON_SPACING_MARK)
0098: | (1 << Character.ENCLOSING_MARK)
0099: | (1 << Character.COMBINING_SPACING_MARK)
0100: | (1 << Character.LETTER_NUMBER)
0101: | (1 << Character.OTHER_NUMBER) | PUNCT_BITS
0102: | (1 << Character.MATH_SYMBOL)
0103: | (1 << Character.CURRENCY_SYMBOL)
0104: | (1 << Character.MODIFIER_SYMBOL) | (1 << Character.OTHER_SYMBOL));
0105: static final private int WORD_BITS = (ALPHA_BITS
0106: | (1 << Character.DECIMAL_DIGIT_NUMBER) | (1 << Character.CONNECTOR_PUNCTUATION));
0107:
0108: /**
0109: *----------------------------------------------------------------------
0110: *
0111: * Tcl_StringObjCmd -> StringCmd.cmdProc
0112: *
0113: * This procedure is invoked to process the "string" Tcl command.
0114: * See the user documentation for details on what it does.
0115: *
0116: * Results:
0117: * None.
0118: *
0119: * Side effects:
0120: * See the user documentation.
0121: *
0122: *----------------------------------------------------------------------
0123: */
0124:
0125: public void cmdProc(Interp interp, // Current interpreter.
0126: TclObject[] objv) // Argument list.
0127: throws TclException // A standard Tcl exception.
0128: {
0129: if (objv.length < 2) {
0130: throw new TclNumArgsException(interp, 1, objv,
0131: "option arg ?arg ...?");
0132: }
0133: int index = TclIndex.get(interp, objv[1], options, "option", 0);
0134:
0135: switch (index) {
0136: case STR_EQUAL:
0137: case STR_COMPARE: {
0138:
0139: if (objv.length < 4 || objv.length > 7) {
0140: throw new TclNumArgsException(interp, 2, objv,
0141: "?-nocase? ?-length int? string1 string2");
0142: }
0143:
0144: boolean nocase = false;
0145: int reqlength = -1;
0146: for (int i = 2; i < objv.length - 2; i++) {
0147: String string2 = objv[i].toString();
0148: int length2 = string2.length();
0149: if ((length2 > 1) && "-nocase".startsWith(string2)) {
0150: nocase = true;
0151: } else if ((length2 > 1)
0152: && "-length".startsWith(string2)) {
0153: if (i + 1 >= objv.length - 2) {
0154: throw new TclNumArgsException(interp, 2, objv,
0155: "?-nocase? ?-length int? string1 string2");
0156: }
0157: reqlength = TclInteger.get(interp, objv[++i]);
0158: } else {
0159: throw new TclException(interp, "bad option \""
0160: + string2
0161: + "\": must be -nocase or -length");
0162: }
0163: }
0164:
0165: String string1 = objv[objv.length - 2].toString();
0166: String string2 = objv[objv.length - 1].toString();
0167: int length1 = string1.length();
0168: int length2 = string2.length();
0169:
0170: // This is the min length IN BYTES of the two strings
0171:
0172: int length = (length1 < length2) ? length1 : length2;
0173:
0174: int match;
0175:
0176: if (reqlength == 0) {
0177: // Anything matches at 0 chars, right?
0178:
0179: match = 0;
0180: } else if (nocase
0181: || ((reqlength > 0) && (reqlength <= length))) {
0182: // In Java, strings are always encoded in unicode, so we do
0183: // not need to worry about individual char lengths
0184:
0185: // Do the reqlength check again, against 0 as well for
0186: // the benfit of nocase
0187:
0188: if ((reqlength > 0) && (reqlength < length)) {
0189: length = reqlength;
0190: } else if (reqlength < 0) {
0191: // The requested length is negative, so we ignore it by
0192: // setting it to the longer of the two lengths.
0193:
0194: reqlength = (length1 > length2) ? length1 : length2;
0195: }
0196: if (nocase) {
0197: string1 = string1.toLowerCase();
0198: string2 = string2.toLowerCase();
0199: }
0200: match = string1.substring(0, length).compareTo(
0201: string2.substring(0, length));
0202:
0203: if ((match == 0) && (reqlength > length)) {
0204: match = length1 - length2;
0205: }
0206:
0207: } else {
0208: match = string1.substring(0, length).compareTo(
0209: string2.substring(0, length));
0210: if (match == 0) {
0211: match = length1 - length2;
0212: }
0213: }
0214:
0215: if (index == STR_EQUAL) {
0216: interp.setResult((match != 0) ? false : true);
0217: } else {
0218: interp.setResult(((match > 0) ? 1 : (match < 0) ? -1
0219: : 0));
0220: }
0221: break;
0222: }
0223:
0224: case STR_FIRST: {
0225: if (objv.length < 4 || objv.length > 5) {
0226: throw new TclNumArgsException(interp, 2, objv,
0227: "subString string ?startIndex?");
0228: }
0229: String string1 = objv[2].toString();
0230: String string2 = objv[3].toString();
0231: int length1 = string1.length();
0232: int length2 = string2.length();
0233:
0234: int start = 0;
0235:
0236: if (objv.length == 5) {
0237: // If a startIndex is specified, we will need to fast
0238: // forward to that point in the string before we think
0239: // about a match.
0240:
0241: start = Util.getIntForIndex(interp, objv[4],
0242: length2 - 1);
0243: if (start >= length2) {
0244: interp.setResult(-1);
0245: break;
0246: }
0247: }
0248:
0249: if (length1 == 0) {
0250: interp.setResult(-1);
0251: } else if (length1 == 1) {
0252: char c = string1.charAt(0);
0253: int result = string2.indexOf(c, start);
0254: interp.setResult(result);
0255: } else {
0256: int result = string2.indexOf(string1, start);
0257: interp.setResult(result);
0258: }
0259: break;
0260: }
0261:
0262: case STR_INDEX: {
0263: if (objv.length != 4) {
0264: throw new TclNumArgsException(interp, 2, objv,
0265: "string charIndex");
0266: }
0267:
0268: String string1 = objv[2].toString();
0269: int length1 = string1.length();
0270:
0271: int i = Util.getIntForIndex(interp, objv[3], length1 - 1);
0272:
0273: if ((i >= 0) && (i < length1)) {
0274: // Get char at the given index. Check for a
0275: // common TclObject that represents this
0276: // single character, and allocate a new
0277: // TclString if not found.
0278:
0279: TclObject obj = interp.checkCommonCharacter(string1
0280: .charAt(i));
0281: if (obj == null) {
0282: obj = TclString.newInstance(string1.substring(i,
0283: i + 1));
0284: }
0285: interp.setResult(obj);
0286: }
0287: break;
0288: }
0289:
0290: case STR_IS: {
0291: if (objv.length < 4 || objv.length > 7) {
0292: throw new TclNumArgsException(interp, 2, objv,
0293: "class ?-strict? ?-failindex var? str");
0294: }
0295: index = TclIndex
0296: .get(interp, objv[2], isOptions, "class", 0);
0297:
0298: boolean strict = false;
0299: TclObject failVarObj = null;
0300:
0301: if (objv.length != 4) {
0302: for (int i = 3; i < objv.length - 1; i++) {
0303: String string2 = objv[i].toString();
0304: int length2 = string2.length();
0305: if ((length2 > 1) && "-strict".startsWith(string2)) {
0306: strict = true;
0307: } else if ((length2 > 1)
0308: && "-failindex".startsWith(string2)) {
0309: if (i + 1 >= objv.length - 1) {
0310: throw new TclNumArgsException(interp, 3,
0311: objv,
0312: "?-strict? ?-failindex var? str");
0313: }
0314: failVarObj = objv[++i];
0315: } else {
0316: throw new TclException(interp, "bad option \""
0317: + string2
0318: + "\": must be -strict or -failindex");
0319: }
0320: }
0321: }
0322:
0323: boolean result = true;
0324: int failat = 0;
0325:
0326: // We get the objPtr so that we can short-cut for some classes
0327: // by checking the object type (int and double), but we need
0328: // the string otherwise, because we don't want any conversion
0329: // of type occuring (as, for example, Tcl_Get*FromObj would do
0330:
0331: TclObject obj = objv[objv.length - 1];
0332: String string1 = obj.toString();
0333: int length1 = string1.length();
0334: if (length1 == 0) {
0335: if (strict) {
0336: result = false;
0337: }
0338: interp.setResult(result);
0339: return;
0340: }
0341:
0342: switch (index) {
0343: case STR_IS_BOOL:
0344: case STR_IS_TRUE:
0345: case STR_IS_FALSE: {
0346: try {
0347: boolean i = Util.getBoolean(null, string1);
0348: if (((index == STR_IS_TRUE) && !i)
0349: || ((index == STR_IS_FALSE) && i)) {
0350: result = false;
0351: }
0352: } catch (TclException e) {
0353: result = false;
0354: }
0355: break;
0356: }
0357: case STR_IS_DOUBLE: {
0358: if (obj.isDoubleType() || obj.isIntType()) {
0359: break;
0360: }
0361:
0362: // This is adapted from Tcl_GetDouble
0363: //
0364: // The danger in this function is that
0365: // "12345678901234567890" is an acceptable 'double',
0366: // but will later be interp'd as an int by something
0367: // like [expr]. Therefore, we check to see if it looks
0368: // like an int, and if so we do a range check on it.
0369: // If strtoul gets to the end, we know we either
0370: // received an acceptable int, or over/underflow
0371:
0372: if (Expression.looksLikeInt(string1, length1, 0, false)) {
0373: StrtoulResult res = interp.strtoulResult;
0374: Util.strtoul(string1, 0, 0, res);
0375: if (res.index == length1) {
0376: if (res.errno == TCL.INTEGER_RANGE) {
0377: result = false;
0378: failat = -1;
0379: }
0380: break;
0381: }
0382: }
0383:
0384: StrtodResult res = interp.strtodResult;
0385: Util.strtod(string1, 0, -1, res);
0386: if (res.errno == TCL.DOUBLE_RANGE) {
0387: // if (errno == ERANGE), then it was an over/underflow
0388: // problem, but in this method, we only want to know
0389: // yes or no, so bad flow returns 0 (false) and sets
0390: // the failVarObj to the string length.
0391:
0392: result = false;
0393: failat = -1;
0394: } else if (res.index == 0) {
0395: // In this case, nothing like a number was found
0396:
0397: result = false;
0398: failat = 0;
0399: } else {
0400: // Go onto SPACE, since we are
0401: // allowed trailing whitespace
0402:
0403: failat = res.index;
0404: for (int i = res.index; i < length1; i++) {
0405: if (!Character.isWhitespace(string1.charAt(i))) {
0406: result = false;
0407: break;
0408: }
0409: }
0410: }
0411: break;
0412: }
0413: case STR_IS_INT: {
0414: if (obj.isIntType()) {
0415: break;
0416: }
0417: boolean isInteger = true;
0418: try {
0419: TclInteger.get(null, obj);
0420: } catch (TclException e) {
0421: isInteger = false;
0422: }
0423: if (isInteger) {
0424: break;
0425: }
0426:
0427: StrtoulResult res = interp.strtoulResult;
0428: Util.strtoul(string1, 0, 0, res);
0429: if (res.errno == TCL.INTEGER_RANGE) {
0430: // if (errno == ERANGE), then it was an over/underflow
0431: // problem, but in this method, we only want to know
0432: // yes or no, so bad flow returns false and sets
0433: // the failVarObj to the string length.
0434:
0435: result = false;
0436: failat = -1;
0437: } else if (res.index == 0) {
0438: // In this case, nothing like a number was found
0439:
0440: result = false;
0441: failat = 0;
0442: } else {
0443: // Go onto SPACE, since we are
0444: // allowed trailing whitespace
0445:
0446: failat = res.index;
0447: for (int i = res.index; i < length1; i++) {
0448: if (!Character.isWhitespace(string1.charAt(i))) {
0449: result = false;
0450: break;
0451: }
0452: }
0453: }
0454: break;
0455: }
0456: default: {
0457: for (failat = 0; failat < length1; failat++) {
0458: char c = string1.charAt(failat);
0459: switch (index) {
0460: case STR_IS_ASCII:
0461: // This is a valid check in unicode, because
0462: // all bytes < 0xC0 are single byte chars
0463: // (but isascii limits that def'n to 0x80).
0464:
0465: result = c < 0x80;
0466: break;
0467: case STR_IS_ALNUM:
0468: result = Character.isLetterOrDigit(c);
0469: break;
0470: case STR_IS_ALPHA:
0471: result = Character.isLetter(c);
0472: break;
0473: case STR_IS_DIGIT:
0474: result = Character.isDigit(c);
0475: break;
0476: case STR_IS_GRAPH:
0477: result = ((1 << Character.getType(c)) & PRINT_BITS) != 0
0478: && c != ' ';
0479: break;
0480: case STR_IS_PRINT:
0481: result = ((1 << Character.getType(c)) & PRINT_BITS) != 0;
0482: break;
0483: case STR_IS_PUNCT:
0484: result = ((1 << Character.getType(c)) & PUNCT_BITS) != 0;
0485: break;
0486: case STR_IS_UPPER:
0487: result = Character.isUpperCase(c);
0488: break;
0489: case STR_IS_SPACE:
0490: result = Character.isWhitespace(c);
0491: break;
0492: case STR_IS_CONTROL:
0493: result = Character.isISOControl(c);
0494: break;
0495: case STR_IS_LOWER:
0496: result = Character.isLowerCase(c);
0497: break;
0498: case STR_IS_WORD:
0499: result = ((1 << Character.getType(c)) & WORD_BITS) != 0;
0500: break;
0501: case STR_IS_XDIGIT:
0502: result = Character.digit(c, 16) >= 0;
0503: break;
0504: default:
0505: throw new TclRuntimeError("unimplemented");
0506: }
0507: if (!result) {
0508: break;
0509: }
0510: }
0511: }
0512: }
0513:
0514: // Only set the failVarObj when we will return 0
0515: // and we have indicated a valid fail index (>= 0)
0516:
0517: if ((!result) && (failVarObj != null)) {
0518: interp.setVar(failVarObj.toString(), null, failat, 0);
0519: }
0520: interp.setResult(result);
0521: break;
0522: }
0523:
0524: case STR_LAST: {
0525: if (objv.length < 4 || objv.length > 5) {
0526: throw new TclNumArgsException(interp, 2, objv,
0527: "subString string ?lastIndex?");
0528: }
0529: String string1 = objv[2].toString();
0530: String string2 = objv[3].toString();
0531: int length1 = string1.length();
0532: int length2 = string2.length();
0533:
0534: int last = 0;
0535: if (objv.length == 5) {
0536: // If a lastIndex is specified, we will need to restrict the
0537: // string range to that char index in the string.
0538:
0539: last = Util
0540: .getIntForIndex(interp, objv[4], length2 - 1);
0541: if (last < 0) {
0542: interp.setResult(-1);
0543: break;
0544: } else if (last < length2) {
0545: string2 = string2.substring(0, last + 1);
0546: }
0547: }
0548:
0549: if (length1 == 0) {
0550: interp.setResult(-1);
0551: } else if (length1 == 1) {
0552: char c = string1.charAt(0);
0553: int result = string2.lastIndexOf(c);
0554: interp.setResult(result);
0555: } else {
0556: int result = string2.lastIndexOf(string1);
0557: interp.setResult(result);
0558: }
0559: break;
0560: }
0561:
0562: case STR_BYTELENGTH:
0563: if (objv.length != 3) {
0564: throw new TclNumArgsException(interp, 2, objv, "string");
0565: }
0566: interp.setResult(Utf8Count(objv[2].toString()));
0567: break;
0568:
0569: case STR_LENGTH: {
0570: if (objv.length != 3) {
0571: throw new TclNumArgsException(interp, 2, objv, "string");
0572: }
0573: interp.setResult(objv[2].toString().length());
0574: break;
0575: }
0576:
0577: case STR_MAP: {
0578: if (objv.length < 4 || objv.length > 5) {
0579: throw new TclNumArgsException(interp, 2, objv,
0580: "?-nocase? charMap string");
0581: }
0582:
0583: boolean nocase = false;
0584: if (objv.length == 5) {
0585: String string2 = objv[2].toString();
0586: int length2 = string2.length();
0587: if ((length2 > 1) && "-nocase".startsWith(string2)) {
0588: nocase = true;
0589: } else {
0590: throw new TclException(interp, "bad option \""
0591: + string2 + "\": must be -nocase");
0592: }
0593: }
0594:
0595: TclObject mapElemv[] = TclList.getElements(interp,
0596: objv[objv.length - 2]);
0597: if (mapElemv.length == 0) {
0598: // empty charMap, just return whatever string was given
0599:
0600: interp.setResult(objv[objv.length - 1]);
0601: } else if ((mapElemv.length % 2) != 0) {
0602: // The charMap must be an even number of key/value items
0603:
0604: throw new TclException(interp,
0605: "char map list unbalanced");
0606: }
0607: String string1 = objv[objv.length - 1].toString();
0608: String cmpString1;
0609: if (nocase) {
0610: cmpString1 = string1.toLowerCase();
0611: } else {
0612: cmpString1 = string1;
0613: }
0614: int length1 = string1.length();
0615: if (length1 == 0) {
0616: // Empty input string, just stop now
0617:
0618: break;
0619: }
0620:
0621: // Precompute pointers to the unicode string and length.
0622: // This saves us repeated function calls later,
0623: // significantly speeding up the algorithm.
0624:
0625: String mapStrings[] = new String[mapElemv.length];
0626: int mapLens[] = new int[mapElemv.length];
0627: for (int ix = 0; ix < mapElemv.length; ix++) {
0628: mapStrings[ix] = mapElemv[ix].toString();
0629: mapLens[ix] = mapStrings[ix].length();
0630: }
0631: String cmpStrings[];
0632: if (nocase) {
0633: cmpStrings = new String[mapStrings.length];
0634: for (int ix = 0; ix < mapStrings.length; ix++) {
0635: cmpStrings[ix] = mapStrings[ix].toLowerCase();
0636: }
0637: } else {
0638: cmpStrings = mapStrings;
0639: }
0640:
0641: TclObject result = TclString.newInstance("");
0642: int p, str1;
0643: for (p = 0, str1 = 0; str1 < length1; str1++) {
0644: for (index = 0; index < mapStrings.length; index += 2) {
0645: // Get the key string to match on
0646:
0647: String string2 = mapStrings[index];
0648: int length2 = mapLens[index];
0649: if ((length2 > 0)
0650: && (cmpString1.substring(str1)
0651: .startsWith(cmpStrings[index]))) {
0652: if (p != str1) {
0653: // Put the skipped chars onto the result first
0654:
0655: TclString.append(result, string1.substring(
0656: p, str1));
0657: p = str1 + length2;
0658: } else {
0659: p += length2;
0660: }
0661:
0662: // Adjust len to be full length of matched string
0663:
0664: str1 = p - 1;
0665:
0666: // Append the map value to the unicode string
0667:
0668: TclString.append(result, mapStrings[index + 1]);
0669: break;
0670: }
0671: }
0672: }
0673:
0674: if (p != str1) {
0675: // Put the rest of the unmapped chars onto result
0676:
0677: TclString.append(result, string1.substring(p, str1));
0678: }
0679: interp.setResult(result);
0680: break;
0681: }
0682:
0683: case STR_MATCH: {
0684: if (objv.length < 4 || objv.length > 5) {
0685: throw new TclNumArgsException(interp, 2, objv,
0686: "?-nocase? pattern string");
0687: }
0688:
0689: String string1, string2;
0690: if (objv.length == 5) {
0691: String string = objv[2].toString();
0692: if (!((string.length() > 1) && "-nocase"
0693: .startsWith(string))) {
0694: throw new TclException(interp, "bad option \""
0695: + string + "\": must be -nocase");
0696: }
0697: string1 = objv[4].toString().toLowerCase();
0698: string2 = objv[3].toString().toLowerCase();
0699: } else {
0700: string1 = objv[3].toString();
0701: string2 = objv[2].toString();
0702: }
0703:
0704: interp.setResult(Util.stringMatch(string1, string2));
0705: break;
0706: }
0707:
0708: case STR_RANGE: {
0709: if (objv.length != 5) {
0710: throw new TclNumArgsException(interp, 2, objv,
0711: "string first last");
0712: }
0713:
0714: String string1 = objv[2].toString();
0715: int length1 = string1.length();
0716:
0717: int first = Util.getIntForIndex(interp, objv[3],
0718: length1 - 1);
0719: if (first < 0) {
0720: first = 0;
0721: }
0722: int last = Util
0723: .getIntForIndex(interp, objv[4], length1 - 1);
0724: if (last >= length1) {
0725: last = length1 - 1;
0726: }
0727:
0728: if (first > last) {
0729: interp.resetResult();
0730: } else {
0731: interp.setResult(string1.substring(first, last + 1));
0732: }
0733: break;
0734: }
0735:
0736: case STR_REPEAT: {
0737: if (objv.length != 4) {
0738: throw new TclNumArgsException(interp, 2, objv,
0739: "string count");
0740: }
0741:
0742: int count = TclInteger.get(interp, objv[3]);
0743:
0744: String string1 = objv[2].toString();
0745: if (string1.length() > 0) {
0746: TclObject tstr = TclString.newInstance("");
0747: for (index = 0; index < count; index++) {
0748: TclString.append(tstr, string1);
0749: }
0750: interp.setResult(tstr);
0751: }
0752: break;
0753: }
0754:
0755: case STR_REPLACE: {
0756: if (objv.length < 5 || objv.length > 6) {
0757: throw new TclNumArgsException(interp, 2, objv,
0758: "string first last ?string?");
0759: }
0760:
0761: String string1 = objv[2].toString();
0762: int length1 = string1.length() - 1;
0763:
0764: int first = Util.getIntForIndex(interp, objv[3], length1);
0765: int last = Util.getIntForIndex(interp, objv[4], length1);
0766:
0767: if ((last < first) || (first > length1) || (last < 0)) {
0768: interp.setResult(objv[2]);
0769: } else {
0770: if (first < 0) {
0771: first = 0;
0772: }
0773: String start = string1.substring(first);
0774: int ind = ((last > length1) ? length1 : last) - first
0775: + 1;
0776: String end;
0777: if (ind <= 0) {
0778: end = start;
0779: } else if (ind >= start.length()) {
0780: end = "";
0781: } else {
0782: end = start.substring(ind);
0783: }
0784:
0785: TclObject tstr = TclString.newInstance(string1
0786: .substring(0, first));
0787:
0788: if (objv.length == 6) {
0789: TclString.append(tstr, objv[5]);
0790: }
0791: if (last < length1) {
0792: TclString.append(tstr, end);
0793: }
0794:
0795: interp.setResult(tstr);
0796: }
0797: break;
0798: }
0799:
0800: case STR_TOLOWER:
0801: case STR_TOUPPER:
0802: case STR_TOTITLE: {
0803: if (objv.length < 3 || objv.length > 5) {
0804: throw new TclNumArgsException(interp, 2, objv,
0805: "string ?first? ?last?");
0806: }
0807: String string1 = objv[2].toString();
0808:
0809: if (objv.length == 3) {
0810: if (index == STR_TOLOWER) {
0811: interp.setResult(string1.toLowerCase());
0812: } else if (index == STR_TOUPPER) {
0813: interp.setResult(string1.toUpperCase());
0814: } else {
0815: interp.setResult(Util.toTitle(string1));
0816: }
0817: } else {
0818: int length1 = string1.length() - 1;
0819: int first = Util.getIntForIndex(interp, objv[3],
0820: length1);
0821: if (first < 0) {
0822: first = 0;
0823: }
0824: int last = first;
0825: if (objv.length == 5) {
0826: last = Util
0827: .getIntForIndex(interp, objv[4], length1);
0828: }
0829: if (last >= length1) {
0830: last = length1;
0831: }
0832: if (last < first) {
0833: interp.setResult(objv[2]);
0834: break;
0835: }
0836:
0837: String string2;
0838: StringBuffer buf = new StringBuffer();
0839: buf.append(string1.substring(0, first));
0840: if (last + 1 > length1) {
0841: string2 = string1.substring(first);
0842: } else {
0843: string2 = string1.substring(first, last + 1);
0844: }
0845: if (index == STR_TOLOWER) {
0846: buf.append(string2.toLowerCase());
0847: } else if (index == STR_TOUPPER) {
0848: buf.append(string2.toUpperCase());
0849: } else {
0850: buf.append(Util.toTitle(string2));
0851: }
0852: if (last + 1 <= length1) {
0853: buf.append(string1.substring(last + 1));
0854: }
0855:
0856: interp.setResult(buf.toString());
0857: }
0858: break;
0859: }
0860:
0861: case STR_TRIM: {
0862: if (objv.length == 3) {
0863: // Case 1: "string trim str" --
0864: // Remove leading and trailing white space
0865:
0866: interp.setResult(objv[2].toString().trim());
0867: } else if (objv.length == 4) {
0868:
0869: // Case 2: "string trim str chars" --
0870: // Remove leading and trailing chars in the chars set
0871:
0872: String tmp = Util.TrimLeft(objv[2].toString(), objv[3]
0873: .toString());
0874: interp.setResult(Util
0875: .TrimRight(tmp, objv[3].toString()));
0876: } else {
0877: // Case 3: Wrong # of args
0878:
0879: throw new TclNumArgsException(interp, 2, objv,
0880: "string ?chars?");
0881: }
0882: break;
0883: }
0884:
0885: case STR_TRIMLEFT: {
0886: if (objv.length == 3) {
0887: // Case 1: "string trimleft str" --
0888: // Remove leading and trailing white space
0889:
0890: interp.setResult(Util.TrimLeft(objv[2].toString()));
0891: } else if (objv.length == 4) {
0892: // Case 2: "string trimleft str chars" --
0893: // Remove leading and trailing chars in the chars set
0894:
0895: interp.setResult(Util.TrimLeft(objv[2].toString(),
0896: objv[3].toString()));
0897: } else {
0898: // Case 3: Wrong # of args
0899:
0900: throw new TclNumArgsException(interp, 2, objv,
0901: "string ?chars?");
0902: }
0903: break;
0904: }
0905:
0906: case STR_TRIMRIGHT: {
0907: if (objv.length == 3) {
0908: // Case 1: "string trimright str" --
0909: // Remove leading and trailing white space
0910:
0911: interp.setResult(Util.TrimRight(objv[2].toString()));
0912: } else if (objv.length == 4) {
0913: // Case 2: "string trimright str chars" --
0914: // Remove leading and trailing chars in the chars set
0915:
0916: interp.setResult(Util.TrimRight(objv[2].toString(),
0917: objv[3].toString()));
0918: } else {
0919: // Case 3: Wrong # of args
0920:
0921: throw new TclNumArgsException(interp, 2, objv,
0922: "string ?chars?");
0923: }
0924: break;
0925: }
0926:
0927: case STR_WORDEND: {
0928: if (objv.length != 4) {
0929: throw new TclNumArgsException(interp, 2, objv,
0930: "string index");
0931: }
0932:
0933: String string1 = objv[2].toString();
0934: char strArray[] = string1.toCharArray();
0935: int cur;
0936: int length1 = string1.length();
0937: index = Util.getIntForIndex(interp, objv[3], length1 - 1);
0938:
0939: if (index < 0) {
0940: index = 0;
0941: }
0942: if (index >= length1) {
0943: interp.setResult(length1);
0944: return;
0945: }
0946: for (cur = index; cur < length1; cur++) {
0947: char c = strArray[cur];
0948: if (((1 << Character.getType(c)) & WORD_BITS) == 0) {
0949: break;
0950: }
0951: }
0952: if (cur == index) {
0953: cur = index + 1;
0954: }
0955: interp.setResult(cur);
0956: break;
0957: }
0958:
0959: case STR_WORDSTART: {
0960: if (objv.length != 4) {
0961: throw new TclNumArgsException(interp, 2, objv,
0962: "string index");
0963: }
0964:
0965: String string1 = objv[2].toString();
0966: char strArray[] = string1.toCharArray();
0967: int cur;
0968: int length1 = string1.length();
0969: index = Util.getIntForIndex(interp, objv[3], length1 - 1);
0970:
0971: if (index > length1) {
0972: index = length1 - 1;
0973: }
0974: if (index < 0) {
0975: interp.setResult(0);
0976: return;
0977: }
0978: for (cur = index; cur >= 0; cur--) {
0979: char c = strArray[cur];
0980: if (((1 << Character.getType(c)) & WORD_BITS) == 0) {
0981: break;
0982: }
0983: }
0984: if (cur != index) {
0985: cur += 1;
0986: }
0987: interp.setResult(cur);
0988: break;
0989: }
0990: }
0991: }
0992:
0993: // return the number of Utf8 bytes that would be needed to store s
0994:
0995: final static int Utf8Count(String s) {
0996: int p = 0;
0997: final int len = s.length();
0998: char c;
0999: int sum = 0;
1000:
1001: while (p < len) {
1002: c = s.charAt(p++);
1003:
1004: if ((c > 0) && (c < 0x80)) {
1005: sum += 1;
1006: continue;
1007: }
1008: if (c <= 0x7FF) {
1009: sum += 2;
1010: continue;
1011: }
1012: if (c <= 0xFFFF) {
1013: sum += 3;
1014: continue;
1015: }
1016: }
1017:
1018: return sum;
1019: }
1020:
1021: // return the number of Utf8 bytes for the character c
1022:
1023: final static int Utf8Count(char c) {
1024: if ((c > 0) && (c < 0x80)) {
1025: return 1;
1026: } else if (c <= 0x7FF) {
1027: return 2;
1028: } else {
1029: return 3;
1030: }
1031: }
1032:
1033: } // end StringCmd
|