Source Code Cross Referenced for TclParser.java in  » Scripting » jacl » tcl » lang » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Scripting » jacl » tcl.lang 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /* 
0002:         * tclpro/tclparser/tclParser.c -> TclParser.java
0003:         *
0004:         *	This is a Tcl language parser as a Tcl dynamically loadable
0005:         *	extension.
0006:         *
0007:         * Copyright (c) 1996 by Sun Microsystems, Inc.
0008:         * Copyright (c) 2000 Ajuba Solutions
0009:         *
0010:         * See the file "license.terms" for information on usage and redistribution
0011:         * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
0012:         *
0013:         * RCS: @(#) $Id: TclParser.java,v 1.5 2005/11/22 22:10:02 mdejong Exp $
0014:         */
0015:
0016:        package tcl.lang;
0017:
0018:        public class TclParser implements  Command {
0019:
0020:            static final private String[] options = { "command", "expr",
0021:                    "varname", "list", "getrange", "getstring", "charindex",
0022:                    "charlength", "countnewline" };
0023:
0024:            static final private int PARSE_COMMAND = 0;
0025:            static final private int PARSE_EXPR = 1;
0026:            static final private int PARSE_VARNAME = 2;
0027:            static final private int PARSE_LIST = 3;
0028:            static final private int PARSE_GET_RANGE = 4;
0029:            static final private int PARSE_GET_STR = 5;
0030:            static final private int PARSE_CHAR_INDEX = 6;
0031:            static final private int PARSE_CHAR_LEN = 7;
0032:            static final private int PARSE_COUNT_NWLNE = 8;
0033:
0034:            /*
0035:             *----------------------------------------------------------------------
0036:             *
0037:             * ParseObjCmd -> cmdProc
0038:             *
0039:             *	This function implements the Tcl "parse" command.
0040:             *
0041:             * Results:
0042:             *	A standard Tcl result.
0043:             *
0044:             * Side effects:
0045:             *	None.
0046:             *
0047:             *----------------------------------------------------------------------
0048:             */
0049:
0050:            public void cmdProc(Interp interp, // Current interpreter. 
0051:                    TclObject[] objv) // Arguments to command
0052:                    throws TclException {
0053:                int option, index, length, scriptLength;
0054:
0055:                if (objv.length < 3) {
0056:                    throw new TclNumArgsException(interp, 1, objv,
0057:                            "option arg ?arg ...?");
0058:                }
0059:                option = TclIndex.get(interp, objv[1], options, "option", 0);
0060:
0061:                // If the script argument holds a cached UTF8CharPointer internal rep
0062:                // then grab it and use it. Otherwise, create a new UTF8CharPointer
0063:                // and set it as the internal rep.
0064:
0065:                TclObject tobj = objv[2];
0066:                UTF8CharPointer script;
0067:                InternalRep irep = tobj.getInternalRep();
0068:                if (irep instanceof  UTF8CharPointer) {
0069:                    script = (UTF8CharPointer) irep;
0070:                } else {
0071:                    script = new UTF8CharPointer(tobj.toString());
0072:                    tobj.setInternalRep(script);
0073:                }
0074:                if (script == null) {
0075:                    System.out.println(script); // For debugging only
0076:                }
0077:                scriptLength = script.getByteLength();
0078:
0079:                // Check the number arguments passed to the command and
0080:                // extract information (script, index, length) depending
0081:                // upon the option selected.
0082:
0083:                switch (option) {
0084:                case PARSE_GET_RANGE: {
0085:                    if (objv.length == 3) {
0086:                        index = 0;
0087:                        length = scriptLength;
0088:                    } else if (objv.length == 5) {
0089:                        index = TclInteger.get(interp, objv[3]);
0090:                        length = TclInteger.get(interp, objv[4]);
0091:
0092:                        if (index < 0) {
0093:                            index = 0;
0094:                        } else if (index >= scriptLength) {
0095:                            index = scriptLength - 1;
0096:                        }
0097:                        if (length < 0) {
0098:                            length = 0;
0099:                        } else if (length > (scriptLength - index)) {
0100:                            length = scriptLength - index;
0101:                        }
0102:                    } else {
0103:                        throw new TclNumArgsException(interp, 2, objv,
0104:                                "string ?index length?");
0105:                    }
0106:                    interp.setResult(ParseMakeRange(script, index, length));
0107:                    return;
0108:                }
0109:                case PARSE_COMMAND:
0110:                case PARSE_EXPR:
0111:                case PARSE_VARNAME:
0112:                case PARSE_LIST:
0113:                case PARSE_GET_STR:
0114:                case PARSE_CHAR_INDEX:
0115:                case PARSE_CHAR_LEN: {
0116:                    if (objv.length != 4) {
0117:                        throw new TclNumArgsException(interp, 2, objv,
0118:                                "string range");
0119:                    }
0120:                    ParseGetIndexAndLengthResult result = new ParseGetIndexAndLengthResult();
0121:                    ParseGetIndexAndLength(interp, objv[3], scriptLength,
0122:                            result);
0123:                    index = result.indexPtr;
0124:                    length = result.lengthPtr;
0125:
0126:                    switch (option) {
0127:                    case PARSE_COMMAND:
0128:                        ParseCommand(interp, script, index, length);
0129:                        return;
0130:                    case PARSE_EXPR:
0131:                        ParseExpr(interp, script, index, length);
0132:                        return;
0133:                    case PARSE_VARNAME:
0134:                        ParseVarName(interp, script, index, length);
0135:                        return;
0136:                    case PARSE_LIST:
0137:                        ParseList(interp, script, index, length);
0138:                        return;
0139:                    case PARSE_GET_STR:
0140:                        ParseGetString(interp, script, index, length);
0141:                        return;
0142:                    case PARSE_CHAR_INDEX:
0143:                        ParseCharIndex(interp, script, index, length);
0144:                        return;
0145:                    case PARSE_CHAR_LEN:
0146:                        ParseCharLength(interp, script, index, length);
0147:                        return;
0148:                    case PARSE_GET_RANGE:
0149:                    case PARSE_COUNT_NWLNE:
0150:                        // No Op - This will suppress compiler warnings
0151:                        break;
0152:                    }
0153:                    break;
0154:                }
0155:                case PARSE_COUNT_NWLNE: {
0156:                    TclObject range2;
0157:                    if (objv.length == 5) {
0158:                        range2 = objv[4];
0159:                    } else if (objv.length == 4) {
0160:                        range2 = null;
0161:                    } else {
0162:                        throw new TclNumArgsException(interp, 2, objv,
0163:                                "string range ?range?");
0164:                    }
0165:                    ParseCountNewline(interp, script, scriptLength, objv[3],
0166:                            range2);
0167:                    return;
0168:                }
0169:                }
0170:                throw new TclException(interp, "unmatched option");
0171:            }
0172:
0173:            /*
0174:             *----------------------------------------------------------------------
0175:             *
0176:             * ParseCommand --
0177:             *
0178:             *	This function parses a script into Tcl commands by calling the
0179:             *	Tcl_ParseCommand function.  This routine returns a list of the
0180:             *	following form: <commentRange> <commandRange> <restRange> <parseTree>
0181:             *	The first range refers to any leading comments before the command.
0182:             *	The second range refers to the command itself.  The third range
0183:             *	contains the remainder of the original range that appears after
0184:             *	the command range.  The parseTree is a list representation
0185:             *	of the parse tree where each node is a list in the form:
0186:             *	<type> <range> <subTree>.
0187:             *
0188:             * Results:
0189:             *	A standard Tcl result.
0190:             *
0191:             * Side effects:
0192:             *	None.
0193:             *
0194:             *----------------------------------------------------------------------
0195:             */
0196:
0197:            static void ParseCommand(Interp interp, // Current interpreter.
0198:                    UTF8CharPointer script, // Script to parse.
0199:                    int index, // Index to the starting point of the 
0200:                    // script, in bytes.
0201:                    int length) // Length of script be parsed, in bytes.
0202:                    throws TclException {
0203:                TclObject resultPtr, listPtr, tokenPtr;
0204:                TclParse parse;
0205:                int i;
0206:                int endCharIndex;
0207:                int endByteIndex;
0208:
0209:                // Convert byte index and range into char index and range
0210:                int charIndex = script.getCharIndex(index);
0211:                int charLength = script.getCharRange(index, length);
0212:
0213:                parse = Parser.parseCommand(interp, script.array, charIndex,
0214:                        charLength, null, -1, false);
0215:
0216:                if (parse.result != TCL.OK) {
0217:                    ParseSetErrorCode(interp, script, parse);
0218:                }
0219:
0220:                resultPtr = TclList.newInstance();
0221:                if (parse.commentStart != -1) {
0222:                    TclList.append(interp, resultPtr, ParseMakeByteRange(
0223:                            script, parse.commentStart, parse.commentSize));
0224:                } else {
0225:                    TclList.append(interp, resultPtr, ParseMakeRange(script,
0226:                            script.index, 0));
0227:                }
0228:                TclList.append(interp, resultPtr, ParseMakeByteRange(script,
0229:                        parse.commandStart, parse.commandSize));
0230:                endCharIndex = parse.commandStart + parse.commandSize;
0231:                TclList.append(interp, resultPtr,
0232:                        ParseMakeByteRange(script, endCharIndex,
0233:                                (charLength - (endCharIndex - charIndex))));
0234:
0235:                listPtr = TclList.newInstance();
0236:                ParseMakeTokenListResult result = new ParseMakeTokenListResult();
0237:                i = 0;
0238:                while (i < parse.numTokens) {
0239:                    i = ParseMakeTokenList(script, parse, i, result);
0240:                    tokenPtr = result.newList;
0241:                    TclList.append(null, listPtr, tokenPtr);
0242:                }
0243:                TclList.append(interp, resultPtr, listPtr);
0244:                interp.setResult(resultPtr);
0245:                return;
0246:            }
0247:
0248:            /*
0249:             *----------------------------------------------------------------------
0250:             *
0251:             * ParseExpr --
0252:             *
0253:             *	This function parses a Tcl expression into a tree representation.
0254:             *
0255:             * Results:
0256:             *	A standard Tcl result.
0257:             *
0258:             * Side effects:
0259:             *	None.
0260:             *
0261:             *----------------------------------------------------------------------
0262:             */
0263:
0264:            static void ParseExpr(Interp interp, // Current interpreter.
0265:                    UTF8CharPointer script, // Script to parse.
0266:                    int index, // Index to the starting point of the 
0267:                    // script, in bytes.
0268:                    int length) // Length of script be parsed, in bytes.
0269:                    throws TclException {
0270:                TclParse parse;
0271:
0272:                int charIndex = script.getCharIndex(index);
0273:                int charLength = script.getCharRange(index, length);
0274:
0275:                parse = ParseExpr.parseExpr(interp, script.array, charIndex,
0276:                        charLength);
0277:
0278:                if (parse.result != TCL.OK) {
0279:                    ParseSetErrorCode(interp, script, parse);
0280:                }
0281:
0282:                // There is only one top level token, so just return it.
0283:
0284:                ParseMakeTokenListResult lresult = new ParseMakeTokenListResult();
0285:                ParseMakeTokenList(script, parse, 0, lresult);
0286:                interp.setResult(lresult.newList);
0287:            }
0288:
0289:            /*
0290:             *----------------------------------------------------------------------
0291:             *
0292:             * ParseList --
0293:             *
0294:             *	This function parses a Tcl list into a list of ranges.
0295:             *
0296:             * Results:
0297:             *	A standard Tcl result.
0298:             *
0299:             * Side effects:
0300:             *	None.
0301:             *
0302:             *----------------------------------------------------------------------
0303:             */
0304:
0305:            static void ParseList(Interp interp, // Current interpreter.
0306:                    UTF8CharPointer script, // Script to parse.
0307:                    int index, // Index to the starting point of the 
0308:                    // script, in bytes.
0309:                    int length) // Length of script be parsed, in bytes.
0310:                    throws TclException {
0311:                TclObject resultPtr;
0312:                int size;
0313:                char c;
0314:                String list;
0315:                int elementIndex;
0316:                int listIndex, prevListIndex, lastListIndex;
0317:                FindElemResult fer = new FindElemResult();
0318:                int charIndex, charLength, charListOffset;
0319:                boolean found;
0320:
0321:                charIndex = script.getCharIndex(index);
0322:                charListOffset = (charIndex - script.index);
0323:
0324:                resultPtr = TclList.newInstance();
0325:                list = script.getByteRangeAsString(index, length);
0326:                charLength = list.length();
0327:
0328:                lastListIndex = charLength;
0329:                listIndex = 0;
0330:
0331:                for (;;) {
0332:                    prevListIndex = listIndex;
0333:
0334:                    try {
0335:                        found = Util.findElement(interp, list, listIndex,
0336:                                charLength, fer);
0337:                    } catch (TclException te) {
0338:                        TclObject errorCode = TclList.newInstance();
0339:                        TclList.append(interp, errorCode, TclString
0340:                                .newInstance("PARSE"));
0341:                        TclList.append(interp, errorCode, TclString
0342:                                .newInstance("list"));
0343:                        // Convert to byte range
0344:                        int byteRange = script.getByteRange(script.index,
0345:                                charListOffset + listIndex);
0346:                        TclList.append(interp, errorCode, TclInteger
0347:                                .newInstance(byteRange));
0348:                        TclList.append(interp, errorCode, interp.getResult());
0349:                        interp.setErrorCode(errorCode);
0350:                        throw te;
0351:                    }
0352:                    if (!found) {
0353:                        break;
0354:                    }
0355:                    listIndex = fer.elemEnd;
0356:                    //charLength -= (listIndex - prevListIndex);
0357:                    elementIndex = fer.elemStart;
0358:                    size = fer.size;
0359:
0360:                    // Check to see if this element was in quotes or braces.
0361:                    // If it is, ensure that the range includes the quotes/braces
0362:                    // so the parser can make decisions based on this fact.
0363:
0364:                    if (elementIndex > 0) {
0365:                        c = list.charAt(elementIndex - 1);
0366:                    } else {
0367:                        c = '\0';
0368:                    }
0369:                    if (c == '{' || c == '\"') {
0370:                        elementIndex--;
0371:                        size += 2;
0372:                    }
0373:                    TclList.append(interp, resultPtr, ParseMakeByteRange(
0374:                            script, charListOffset + elementIndex, size));
0375:                }
0376:
0377:                interp.setResult(resultPtr);
0378:            }
0379:
0380:            /*
0381:             *----------------------------------------------------------------------
0382:             *
0383:             * ParseVarName --
0384:             *
0385:             *	This function parses a Tcl braced word into a tree representation.
0386:             *
0387:             * Results:
0388:             *	A standard Tcl result.
0389:             *
0390:             * Side effects:
0391:             *	None.
0392:             *
0393:             *----------------------------------------------------------------------
0394:             */
0395:
0396:            static void ParseVarName(Interp interp, // Current interpreter.
0397:                    UTF8CharPointer script, // Script to parse.
0398:                    int index, // Index to the starting point of the 
0399:                    // script, in bytes.
0400:                    int length) // Length of script be parsed, in bytes.
0401:                    throws TclException {
0402:                TclParse parse;
0403:
0404:                // Convert byte index and range into char index and range
0405:                int charIndex = script.getCharIndex(index);
0406:                int charLength = script.getCharRange(index, length);
0407:
0408:                parse = Parser.parseVarName(interp, script.array, charIndex,
0409:                        charLength, null, false);
0410:                if (parse.result != TCL.OK) {
0411:                    ParseSetErrorCode(interp, script, parse);
0412:                }
0413:
0414:                // There is only one top level token, so just return it.
0415:
0416:                ParseMakeTokenListResult lresult = new ParseMakeTokenListResult();
0417:                ParseMakeTokenList(script, parse, 0, lresult);
0418:                interp.setResult(lresult.newList);
0419:            }
0420:
0421:            /*
0422:             *----------------------------------------------------------------------
0423:             *
0424:             * ParseSetErrorCode --
0425:             *
0426:             *	Set the errorCode variable the standard parser error form
0427:             *	and raise a TclException. This method is invoked after something
0428:             *	goes wrong in a parse operation.
0429:             *
0430:             * Results:
0431:             *	None.
0432:             *
0433:             * Side effects:
0434:             *	None.
0435:             *
0436:             *----------------------------------------------------------------------
0437:             */
0438:
0439:            static void ParseSetErrorCode(Interp interp, // Current interpreter.
0440:                    UTF8CharPointer script, // Script to parse.
0441:                    TclParse parse) // Parse state.
0442:                    throws TclException {
0443:                TclObject tlist;
0444:                String type;
0445:
0446:                switch (parse.errorType) {
0447:                case Parser.TCL_PARSE_QUOTE_EXTRA:
0448:                    type = "quoteExtra";
0449:                    break;
0450:                case Parser.TCL_PARSE_BRACE_EXTRA:
0451:                    type = "braceExtra";
0452:                    break;
0453:                case Parser.TCL_PARSE_MISSING_BRACE:
0454:                    type = "missingBrace";
0455:                    break;
0456:                case Parser.TCL_PARSE_MISSING_BRACKET:
0457:                    type = "missingBracket";
0458:                    break;
0459:                case Parser.TCL_PARSE_MISSING_PAREN:
0460:                    type = "missingParen";
0461:                    break;
0462:                case Parser.TCL_PARSE_MISSING_QUOTE:
0463:                    type = "missingQuote";
0464:                    break;
0465:                case Parser.TCL_PARSE_MISSING_VAR_BRACE:
0466:                    type = "missingVarBrace";
0467:                    break;
0468:                case Parser.TCL_PARSE_SYNTAX:
0469:                    type = "syntax";
0470:                    break;
0471:                case Parser.TCL_PARSE_BAD_NUMBER:
0472:                    type = "badNumber";
0473:                    break;
0474:                default:
0475:                    throw new TclException(interp,
0476:                            "unexpected error type from Tcl_ParseCommand");
0477:                }
0478:                tlist = TclList.newInstance();
0479:                TclList.append(interp, tlist, TclString.newInstance("PARSE"));
0480:                TclList.append(interp, tlist, TclString.newInstance(type));
0481:                if (parse.termIndex > 0) {
0482:                    // Convert to byte range
0483:                    int byteRange = script.getByteRange(script.index,
0484:                            parse.termIndex);
0485:                    TclList.append(interp, tlist, TclInteger
0486:                            .newInstance(byteRange));
0487:                } else {
0488:                    TclList.append(interp, tlist, TclInteger.newInstance(0));
0489:                }
0490:                TclList.append(interp, tlist, interp.getResult());
0491:                interp.setErrorCode(tlist);
0492:                throw new TclException(interp, interp.getResult().toString());
0493:            }
0494:
0495:            /*
0496:             *----------------------------------------------------------------------
0497:             *
0498:             * ParseMakeTokenList --
0499:             *
0500:             *	Make the list representation of a token.  Each token is represented
0501:             *	as a list where the first element is a token type, the second
0502:             *	element is a range, and the third element is a list of
0503:             *	subtokens.
0504:             *
0505:             * Results:
0506:             *	Returns the next token offset and stores a newly allocated
0507:             *	list object in the location referred to by result.
0508:             *
0509:             * Side effects:
0510:             *	None.
0511:             *
0512:             *----------------------------------------------------------------------
0513:             */
0514:
0515:            static class ParseMakeTokenListResult {
0516:                TclObject newList;
0517:            }
0518:
0519:            static int ParseMakeTokenList(UTF8CharPointer script, // Pointer to start of script being parsed.
0520:                    TclParse parse, // Parse information.
0521:                    int index, // Index of token to append.
0522:                    ParseMakeTokenListResult result)
0523:            // Location where resulting list
0524:                    // object is to be stored.
0525:                    throws TclException {
0526:                TclToken token = parse.tokenList[index];
0527:                TclObject resultList, resultIndexList;
0528:                int start;
0529:                String type;
0530:
0531:                switch (token.type) {
0532:                case Parser.TCL_TOKEN_WORD:
0533:                    type = "word";
0534:                    break;
0535:                case Parser.TCL_TOKEN_SIMPLE_WORD:
0536:                    type = "simple";
0537:                    break;
0538:                case Parser.TCL_TOKEN_TEXT:
0539:                    type = "text";
0540:                    break;
0541:                case Parser.TCL_TOKEN_BS:
0542:                    type = "backslash";
0543:                    break;
0544:                case Parser.TCL_TOKEN_COMMAND:
0545:                    type = "command";
0546:                    break;
0547:                case Parser.TCL_TOKEN_VARIABLE:
0548:                    type = "variable";
0549:                    break;
0550:                case Parser.TCL_TOKEN_SUB_EXPR:
0551:                    type = "subexpr";
0552:                    break;
0553:                case Parser.TCL_TOKEN_OPERATOR:
0554:                    type = "operator";
0555:                    break;
0556:                default:
0557:                    type = "unknown";
0558:                    break;
0559:                }
0560:                resultList = TclList.newInstance();
0561:                TclList.append(null, resultList, TclString.newInstance(type));
0562:                TclList.append(null, resultList, ParseMakeByteRange(script,
0563:                        token.script_index, token.size));
0564:                resultIndexList = TclList.newInstance();
0565:                TclList.append(null, resultList, resultIndexList);
0566:                start = index;
0567:                index++;
0568:                ParseMakeTokenListResult lresult = new ParseMakeTokenListResult();
0569:                while (index <= start + token.numComponents) {
0570:                    index = ParseMakeTokenList(script, parse, index, lresult);
0571:                    TclList.append(null, resultIndexList, lresult.newList);
0572:                }
0573:
0574:                result.newList = resultList;
0575:                return index;
0576:            }
0577:
0578:            /*
0579:             *----------------------------------------------------------------------
0580:             *
0581:             * ParseMakeRange --
0582:             *
0583:             *	Construct a new range object. This method depends on the
0584:             *	script.index being set to the starting index of the
0585:             *	entire script.
0586:             *
0587:             * Results:
0588:             *	Returns a newly allocated Tcl object.
0589:             *
0590:             * Side effects:
0591:             *	None.
0592:             *
0593:             *----------------------------------------------------------------------
0594:             */
0595:
0596:            static TclObject ParseMakeRange(UTF8CharPointer script, // Pointer to the start of whole script.
0597:                    int start, // Index of start of the range, in bytes.
0598:                    int length) // The length of the range, in bytes.
0599:                    throws TclException {
0600:                int scriptByteIndex = script.getByteIndex(script.index);
0601:
0602:                TclObject tlist = TclList.newInstance();
0603:                TclList.append(null, tlist, TclInteger.newInstance(start
0604:                        - scriptByteIndex));
0605:                TclList.append(null, tlist, TclInteger.newInstance(length));
0606:                return tlist;
0607:            }
0608:
0609:            /*
0610:             *----------------------------------------------------------------------
0611:             *
0612:             * ParseMakeByteRange --
0613:             *
0614:             *	Construct a new range object containing a byte range given
0615:             *	a start and length in characters.
0616:             *
0617:             * Results:
0618:             *	Returns a newly allocated Tcl object.
0619:             *
0620:             * Side effects:
0621:             *	None.
0622:             *
0623:             *----------------------------------------------------------------------
0624:             */
0625:
0626:            static TclObject ParseMakeByteRange(UTF8CharPointer script, // Pointer to the start of whole script.
0627:                    int start, // Index of start of the range, in chars.
0628:                    int length) // The length of the range, in chars.
0629:                    throws TclException {
0630:                if (start < 0) {
0631:                    throw new TclRuntimeError("char index can't be < 0, was "
0632:                            + start);
0633:                }
0634:                if (length < 0) {
0635:                    throw new TclRuntimeError("char length can't be < 0, was "
0636:                            + length);
0637:                }
0638:                int byteStart = script.getByteIndex(start);
0639:                int byteLength = script.getByteRange(start, length);
0640:                return ParseMakeRange(script, byteStart, byteLength);
0641:            }
0642:
0643:            /*
0644:             *----------------------------------------------------------------------
0645:             *
0646:             * ParseGetString --
0647:             *
0648:             *	Extract the string from the script within the boundaries of
0649:             *	byte oriented index and length.
0650:             *
0651:             * Results:
0652:             *	A standard Tcl result.
0653:             *
0654:             * Side effects:
0655:             *	The interp's result is set.
0656:             *
0657:             *----------------------------------------------------------------------
0658:             */
0659:
0660:            static void ParseGetString(Interp interp, // Current interpreter.
0661:                    UTF8CharPointer script, // Script to parse.
0662:                    int index, // Index to the starting point of the 
0663:                    // script, in bytes
0664:                    int length) // Length of script in bytes.
0665:                    throws TclException {
0666:                String str = script.getByteRangeAsString(index, length);
0667:                interp.setResult(str);
0668:            }
0669:
0670:            /*
0671:             *----------------------------------------------------------------------
0672:             *
0673:             * ParseCharIndex --
0674:             *
0675:             *	Converts byte oriented index values into character oriented
0676:             *	index values.
0677:             *
0678:             * Results:
0679:             *	A standard Tcl result.
0680:             *
0681:             * Side effects:
0682:             *	The interp's result is set.
0683:             *
0684:             *----------------------------------------------------------------------
0685:             */
0686:
0687:            static void ParseCharIndex(Interp interp, // Current interpreter.
0688:                    UTF8CharPointer script, // Script to parse.
0689:                    int index, // Index to the starting point of the 
0690:                    // script, in bytes.
0691:                    int length) // Length of script be parsed, in bytes.
0692:                    throws TclException {
0693:                // Count number of characters from the start of the
0694:                // script to the given byte index.
0695:
0696:                int charIndex = script.getCharIndex(index);
0697:                interp.setResult(charIndex - script.index);
0698:            }
0699:
0700:            /*
0701:             *----------------------------------------------------------------------
0702:             *
0703:             * ParseCharLength --
0704:             *
0705:             *	Converts the given byte length into a character count.
0706:             *
0707:             * Results:
0708:             *	A standard Tcl result.
0709:             *
0710:             * Side effects:
0711:             *	The interp's result is set.
0712:             *
0713:             *----------------------------------------------------------------------
0714:             */
0715:
0716:            static void ParseCharLength(Interp interp, // Current interpreter.
0717:                    UTF8CharPointer script, // Script to parse.
0718:                    int index, // Index to the starting point of the 
0719:                    // script, in bytes.
0720:                    int length) // Length of script be parsed, in bytes.
0721:                    throws TclException {
0722:                // Count number of characters from the byte index
0723:                // to the byte length.
0724:
0725:                int charLength = script.getCharRange(index, length);
0726:                interp.setResult(charLength);
0727:            }
0728:
0729:            /*
0730:             *----------------------------------------------------------------------
0731:             *
0732:             * ParseCountNewline --
0733:             *
0734:             *	Count the number of newlines between a range of bytes
0735:             *	in a script.  If two ranges are passed to this function, 
0736:             *	calculate the number of newlines from the beginning index of
0737:             *	the first range up to, but not including, the beginning of 
0738:             *	the second range.  If one range is passed in, count the 
0739:             *	number of newlines from the beginning of the first range 
0740:             *	through the last character in the range.
0741:             *
0742:             *	It is assumed that the indices and lengths are within the
0743:             *	boundaries of the script.  No error checking is done to
0744:             *	verify this.  Use the ParseGetIndexAndRange to validate
0745:             *	the data.
0746:             *
0747:             * Results:
0748:             *	A standard Tcl result.
0749:             *
0750:             * Side effects:
0751:             *	The interp's result is set to the number of newlines counted.
0752:             *
0753:             *----------------------------------------------------------------------
0754:             */
0755:
0756:            static void ParseCountNewline(Interp interp, // Current interpreter.
0757:                    UTF8CharPointer script, // Script to parse.
0758:                    int scriptLength, // Lengths of script in bytes.
0759:                    TclObject rangePtr1, // Begin counting newlines with this range.
0760:                    TclObject rangePtr2) // Possibly null, otherwise used to terminate
0761:                    // newline counting
0762:                    throws TclException {
0763:                int subStrIndex, endStrIndex;
0764:                int offset, index1, index2 = 0;
0765:                int length, length1, length2;
0766:                int listLen1, listLen2;
0767:                int numNewline;
0768:
0769:                listLen1 = TclList.getLength(interp, rangePtr1);
0770:                ParseGetIndexAndLengthResult result = new ParseGetIndexAndLengthResult();
0771:                ParseGetIndexAndLength(interp, rangePtr1, scriptLength, result);
0772:                index1 = result.indexPtr;
0773:                length1 = result.lengthPtr;
0774:
0775:                if (rangePtr2 != null) {
0776:                    listLen2 = TclList.getLength(interp, rangePtr2);
0777:                    ParseGetIndexAndLength(interp, rangePtr2, scriptLength,
0778:                            result);
0779:                    index2 = result.indexPtr;
0780:                    length2 = result.lengthPtr;
0781:                } else {
0782:                    listLen2 = 0;
0783:                }
0784:
0785:                if ((listLen1 == 0) && (listLen2 == 2)) {
0786:                    // Counting from the beginning of the file to 
0787:                    // the beginning of the second range.
0788:                    //
0789:                    // example: parse count script {} r2
0790:
0791:                    offset = 0;
0792:                    length = index2;
0793:                } else if ((listLen1 == 2) && (listLen2 == 2)) {
0794:                    // Counting from the beginning of the first  
0795:                    // range to the beginning of the second range.
0796:                    //
0797:                    // example: parse count script r1 r2
0798:
0799:                    offset = index1;
0800:                    length = (index2 - offset);
0801:                } else {
0802:                    // Counting from the beginning of the first  
0803:                    // range to the end of the first range.  If
0804:                    // the arg passed was an empty string it 
0805:                    // will count the whole script.
0806:                    //
0807:                    // example: parse count script {}
0808:                    //          parse count script r1
0809:
0810:                    offset = index1;
0811:                    length = length1;
0812:                }
0813:
0814:                subStrIndex = offset;
0815:                endStrIndex = subStrIndex + length;
0816:                numNewline = 0;
0817:
0818:                // Get byte range as a String and count the number of
0819:                // newlines found in that range.
0820:
0821:                String range = script.getByteRangeAsString(subStrIndex, length);
0822:                final int range_length = range.length();
0823:                for (int i = 0; i < range_length; i++) {
0824:                    if (range.charAt(i) == '\n') {
0825:                        numNewline++;
0826:                    }
0827:                }
0828:
0829:                interp.setResult(numNewline);
0830:            }
0831:
0832:            /*
0833:             *----------------------------------------------------------------------
0834:             *
0835:             * ParseGetIndexAndLength --
0836:             *
0837:             *	Extract the index and length from a Tcl Object.  If the 
0838:             *	Tcl Object does not contain data, return the beginning
0839:             *	of the script as the index and the length of the script
0840:             *	for the length.  If the data in the script is out of the
0841:             *	scripts range (e.g. < 0 or > scriptLength,) and scriptLen 
0842:             *      is >= 0, set the value to the closest point. Note that
0843:             *      indexes and ranges are in terms of bytes.
0844:             *
0845:             * Results:
0846:             *	A standard Tcl result.
0847:             *
0848:             * Side effects:
0849:             *	The values are written to the result argument.
0850:             *	If scriptLen is >= 0, the values will be normalized based
0851:             *	on the length of the script.
0852:             *
0853:             *----------------------------------------------------------------------
0854:             */
0855:
0856:            static class ParseGetIndexAndLengthResult {
0857:                int indexPtr; // Index to the starting point of the
0858:                // script. 
0859:                int lengthPtr; // Byte length of script be parsed.
0860:            }
0861:
0862:            static void ParseGetIndexAndLength(Interp interp, // Current interpreter.
0863:                    TclObject rangePtr, int scriptLen, // Length of script in bytes. If >= 0, then try 
0864:                    // to normalize index and length based
0865:                    // on the length of the script.
0866:                    ParseGetIndexAndLengthResult result) throws TclException {
0867:                TclObject itemPtr;
0868:                int listLen;
0869:
0870:                listLen = TclList.getLength(interp, rangePtr);
0871:                if ((listLen != 0) && (listLen != 2)) {
0872:                    throw new TclException(interp,
0873:                            "invalid range input: incorrect list size");
0874:                }
0875:                if ((listLen == 0) && (scriptLen < 0)) {
0876:                    throw new TclException(interp,
0877:                            "empty range: no index or length values");
0878:                }
0879:
0880:                // If the range argument is null, then set 'index' to zero
0881:                // and 'length' to the string length of the script.  Otherwise
0882:                // extract 'index' and 'length' from the list.  If index or length
0883:                // is < 0 then set it to 0, if index or length is > then the scripts
0884:                // length, set it to the end of the script.
0885:
0886:                if (listLen == 0) {
0887:                    result.indexPtr = 0;
0888:                    result.lengthPtr = scriptLen;
0889:                } else {
0890:                    int len;
0891:                    String bytes;
0892:                    itemPtr = TclList.index(interp, rangePtr, 0);
0893:                    result.indexPtr = TclInteger.get(interp, itemPtr);
0894:                    itemPtr = TclList.index(interp, rangePtr, 1);
0895:                    bytes = itemPtr.toString();
0896:                    len = bytes.length();
0897:
0898:                    if (bytes.equals("end")) {
0899:                        result.lengthPtr = scriptLen;
0900:                    } else {
0901:                        result.lengthPtr = TclInteger.get(interp, itemPtr);
0902:                    }
0903:                    if (scriptLen >= 0) {
0904:                        if (result.indexPtr < 0) {
0905:                            result.indexPtr = 0;
0906:                        }
0907:                        if (result.lengthPtr < 0) {
0908:                            result.lengthPtr = 0;
0909:                        }
0910:                        if (result.indexPtr >= scriptLen) {
0911:                            result.indexPtr = scriptLen;
0912:                        }
0913:                        if (result.indexPtr + result.lengthPtr >= scriptLen) {
0914:                            result.lengthPtr = scriptLen - result.indexPtr;
0915:                        }
0916:                    }
0917:                }
0918:                return;
0919:            }
0920:
0921:        } // end class TclParser
0922:
0923:        // This class is used to map UTF8 oriented byte indexes used in
0924:        // the Tcl API for the parser extension into character oriented
0925:        // index used within Jacl.
0926:
0927:        // String "Foo\u00c7bar"
0928:        // Chars:  0123     456
0929:
0930:        // Bytes:       charToByteIndex         byteToCharIndex
0931:        // [0] = 'f'    [0] = 0                 [0] = 0
0932:        // [1] = '0'    [1] = 1                 [1] = 1
0933:        // [2] = 'o'    [2] = 2                 [2] = 2
0934:        // [3] = '?'    [3] = 3                 [3] = 3
0935:        // [4] = '?'                            [4] = 3
0936:        // [5] = 'b'    [4] = 5                 [5] = 4
0937:        // [6] = 'a'    [5] = 6                 [6] = 5
0938:        // [7] = 'r'    [6] = 7                 [7] = 6
0939:
0940:        class UTF8CharPointer extends CharPointer implements  InternalRep {
0941:            int[] charToByteIndex; // Map char index to byte index
0942:            int[] byteToCharIndex; // Map byte index to char index
0943:            byte[] bytes;
0944:            String orig;
0945:
0946:            UTF8CharPointer(String s) {
0947:                super (s);
0948:                orig = s;
0949:                getByteInfo();
0950:            }
0951:
0952:            void getByteInfo() {
0953:                int charIndex, byteIndex, bytesThisChar, bytesTotal;
0954:
0955:                try {
0956:                    // First, loop over the characters to see if each of the characters
0957:                    // can be represented as a single UTF8 byte. In this special
0958:                    // case there is no need to worry about mapping bytes to charaters
0959:                    // or vice versa.
0960:
0961:                    char c;
0962:                    boolean singleBytes = true;
0963:
0964:                    for (int i = 0; i < array.length; i++) {
0965:                        c = array[i];
0966:                        if (c == '\0') {
0967:                            // Ignore encoding issues related to null byte in Java vs UTF8
0968:                            bytesThisChar = 1;
0969:                        } else {
0970:                            bytesThisChar = StringCmd.Utf8Count(c);
0971:                        }
0972:                        if (bytesThisChar != 1) {
0973:                            singleBytes = false;
0974:                            break;
0975:                        }
0976:                    }
0977:
0978:                    // When each character maps to a single byte, bytes is null
0979:
0980:                    if (singleBytes) {
0981:                        bytes = null;
0982:                        return;
0983:                    }
0984:
0985:                    // When multiple byte UTF8 characters are found, convert to
0986:                    // a byte array and save mapping info.
0987:
0988:                    String chars = new String(array); // Get string including trailing null
0989:                    bytes = chars.getBytes("UTF8");
0990:
0991:                    if (chars == null) { // For debugging only
0992:                        System.out.println("chars is \"" + chars + "\" len = "
0993:                                + chars.length());
0994:                        String bstr = new String(bytes, 0, bytes.length, "UTF8");
0995:                        System.out.println("bytes is \"" + bstr + "\" len = "
0996:                                + bytes.length);
0997:                    }
0998:
0999:                    // Count UTF8 bytes for each character, map char to byte index
1000:
1001:                    charToByteIndex = new int[array.length];
1002:
1003:                    for (charIndex = 0, byteIndex = 0; charIndex < charToByteIndex.length; charIndex++) {
1004:                        charToByteIndex[charIndex] = byteIndex;
1005:
1006:                        c = array[charIndex];
1007:                        if (c == '\0') {
1008:                            // Ignore encoding issues related to null byte in Java vs UTF8
1009:                            bytesThisChar = 1;
1010:                        } else {
1011:                            bytesThisChar = StringCmd.Utf8Count(c);
1012:                        }
1013:                        byteIndex += bytesThisChar;
1014:                    }
1015:
1016:                    // Double check that the number of expected bytes
1017:                    // was generated.
1018:                    bytesTotal = byteIndex;
1019:
1020:                    if (bytes.length != bytesTotal) {
1021:                        throw new TclRuntimeError("generated " + bytes.length
1022:                                + " but expected to generate " + bytesTotal
1023:                                + " bytes");
1024:                    }
1025:
1026:                    // Count Utf8 bytes for each character, map byte to char index
1027:
1028:                    byteToCharIndex = new int[bytes.length];
1029:                    for (charIndex = 0, byteIndex = 0, bytesThisChar = 0; byteIndex < byteToCharIndex.length; byteIndex++, bytesThisChar--) {
1030:                        if (byteIndex > 0 && bytesThisChar == 0) {
1031:                            charIndex++;
1032:                        }
1033:                        byteToCharIndex[byteIndex] = charIndex;
1034:
1035:                        c = array[charIndex];
1036:                        if (bytesThisChar == 0) {
1037:                            if (c == '\0') {
1038:                                // Ignore encoding issues related to null byte in Java vs UTF8
1039:                                bytesThisChar = 1;
1040:                            } else {
1041:                                bytesThisChar = StringCmd.Utf8Count(c);
1042:                            }
1043:                        }
1044:                    }
1045:                } catch (java.io.UnsupportedEncodingException ex) {
1046:                    throw new TclRuntimeError("UTF8 encoding not supported");
1047:                }
1048:            }
1049:
1050:            // Return bytes in the given byte range as a String
1051:
1052:            String getByteRangeAsString(int byteIndex, int byteLength) {
1053:                if (bytes == null) {
1054:                    // One byte for each character
1055:                    return orig.substring(byteIndex, byteIndex + byteLength);
1056:                }
1057:
1058:                try {
1059:                    return new String(bytes, byteIndex, byteLength, "UTF8");
1060:                } catch (java.io.UnsupportedEncodingException ex) {
1061:                    throw new TclRuntimeError("UTF8 encoding not supported");
1062:                }
1063:            }
1064:
1065:            // Convert char index into a byte index.
1066:
1067:            int getByteIndex(int charIndex) {
1068:                if (bytes == null) {
1069:                    // One byte for each character
1070:                    return charIndex;
1071:                }
1072:
1073:                return charToByteIndex[charIndex];
1074:            }
1075:
1076:            // Given a char index and range, return the number of
1077:            // bytes in the range.
1078:
1079:            int getByteRange(int charIndex, int charRange) {
1080:                if (bytes == null) {
1081:                    // One byte for each character
1082:                    return charRange;
1083:                }
1084:
1085:                return charToByteIndex[charIndex + charRange]
1086:                        - charToByteIndex[charIndex];
1087:            }
1088:
1089:            // Get number of bytes for the given char index
1090:
1091:            int getBytesAtIndex(int charIndex) {
1092:                if (bytes == null) {
1093:                    // One byte for each character
1094:                    return 1;
1095:                }
1096:
1097:                return charToByteIndex[charIndex + 1]
1098:                        - charToByteIndex[charIndex];
1099:            }
1100:
1101:            // Return length of script in bytes
1102:
1103:            int getByteLength() {
1104:                if (bytes == null) {
1105:                    // One byte for each character
1106:                    return orig.length();
1107:                }
1108:
1109:                return bytes.length - 1;
1110:            }
1111:
1112:            // Given a byte index, return the char index.
1113:
1114:            int getCharIndex(int byteIndex) {
1115:                if (bytes == null) {
1116:                    // One byte for each character
1117:                    return byteIndex;
1118:                }
1119:
1120:                return byteToCharIndex[byteIndex];
1121:            }
1122:
1123:            // Given a byte index and range, return the number of
1124:            // chars in the range.
1125:
1126:            int getCharRange(int byteIndex, int byteRange) {
1127:                if (bytes == null) {
1128:                    // One byte for each character
1129:                    return byteRange;
1130:                }
1131:
1132:                return byteToCharIndex[byteIndex + byteRange]
1133:                        - byteToCharIndex[byteIndex];
1134:            }
1135:
1136:            // This API is used for debugging, it would never be invoked as part
1137:            // of the InternalRep interface since a TclObject would always have
1138:            // a string rep when the  UTF8CharPointer is created and it should
1139:            // never be invalidated.
1140:
1141:            public String toString() {
1142:                if (bytes == null) {
1143:                    // One byte for each character
1144:                    return "1 byte for each character with length "
1145:                            + orig.length();
1146:                }
1147:
1148:                StringBuffer sb = new StringBuffer();
1149:
1150:                int max_char = array.length - 1;
1151:                int max_byte = bytes.length - 1;
1152:                int max = max_char;
1153:                if (max_byte > max) {
1154:                    max = max_byte;
1155:                }
1156:                sb.append("index char/byte array: (sizes = " + max_char + " "
1157:                        + max_byte + ")\n");
1158:
1159:                for (int i = 0; i < max; i++) {
1160:                    String char_ind = "   ", byte_ind = "   ";
1161:                    if (i < max_char) {
1162:                        char_ind = "'" + array[i] + "'";
1163:                    }
1164:                    if (i < max_byte) {
1165:                        byte_ind = "'" + ((char) bytes[i]) + "'";
1166:                    }
1167:
1168:                    sb.append("[" + i + "] = " + char_ind + " " + byte_ind
1169:                            + "\n");
1170:                }
1171:                sb.append("\n");
1172:
1173:                sb.append("charToByteIndex array:\n");
1174:                for (int i = 0; i < charToByteIndex.length - 1; i++) {
1175:                    sb.append("[" + i + "] = " + charToByteIndex[i] + "\n");
1176:                }
1177:                sb.append("\n");
1178:
1179:                sb.append("byteToCharIndex array:\n");
1180:                for (int i = 0; i < byteToCharIndex.length - 1; i++) {
1181:                    sb.append("[" + i + "] = " + byteToCharIndex[i] + "\n");
1182:                }
1183:                sb.append("\n");
1184:
1185:                return sb.toString();
1186:            }
1187:
1188:            // InternalRep interfaces
1189:
1190:            // Called to free any storage for the type's internal rep.
1191:
1192:            public void dispose() {
1193:            }
1194:
1195:            // duplicate
1196:
1197:            public InternalRep duplicate() {
1198:                // A UTF8CharPointer is read-only, so just dup the ref
1199:                return this;
1200:            }
1201:
1202:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.