Source Code Cross Referenced for StreamTokenizer.java in » Apache-Harmony-Java-SE » java-package » java » io » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Apache Harmony Java SE » java package » java.io
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         *  Licensed to the Apache Software Foundation (ASF) under one or more
003:         *  contributor license agreements.  See the NOTICE file distributed with
004:         *  this work for additional information regarding copyright ownership.
005:         *  The ASF licenses this file to You under the Apache License, Version 2.0
006:         *  (the "License"); you may not use this file except in compliance with
007:         *  the License.  You may obtain a copy of the License at
008:         *
009:         *     http://www.apache.org/licenses/LICENSE-2.0
010:         *
011:         *  Unless required by applicable law or agreed to in writing, software
012:         *  distributed under the License is distributed on an "AS IS" BASIS,
013:         *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:         *  See the License for the specific language governing permissions and
015:         *  limitations under the License.
016:         */
017:
018:        package java.io;
019:
020:        /**
021:         * StreamTokenizer takes a stream and a set of tokens and parses them one at a
022:         * time. The different types of tokens that can be found are numbers,
023:         * identifiers, quoted strings, and different comment styles.
024:         */
025:        public class StreamTokenizer {
026:            /**
027:             * Contains a number if the current token is a number (<code>ttype</code>
028:             * is <code>TT_NUMBER</code>)
029:             */
030:            public double nval;
031:
032:            /**
033:             * Contains a string if the current token is a word (<code>ttype</code>
034:             * is <code>TT_WORD</code>)
035:             */
036:            public String sval;
037:
038:            /**
039:             * After calling <code>nextToken</code>, the field <code>ttype</code>
040:             * contains the type of token that has been read. When a single character is
041:             * read, it's integer value is used. For a quoted string, the value is the
042:             * quoted character. If not one of those, then it is one of the following:
043:             * <UL>
044:             * <LI> <code>TT_WORD</code> - the token is a word.</LI>
045:             * <LI> <code>TT_NUMBER</code> - the token is a number.</LI>
046:             * <LI> <code>TT_EOL</code> - the end of line has been reached. Depends on
047:             * whether <code>eolIsSignificant</code> is <code>true</code>.</LI>
048:             * <LI> <code>TT_EOF</code> - the end of the stream has been reached.</LI>
049:             * </UL>
050:             */
051:
052:            /**
053:             * The constant representing end of stream.
054:             */
055:            public static final int TT_EOF = -1;
056:
057:            /**
058:             * The constant representing end of line.
059:             */
060:            public static final int TT_EOL = '\n';
061:
062:            /**
063:             * The constant representing a number token.
064:             */
065:            public static final int TT_NUMBER = -2;
066:
067:            /**
068:             * The constant representing a word token.
069:             */
070:            public static final int TT_WORD = -3;
071:
072:            /**
073:             * Internal representation of unknown state.
074:             */
075:            private static final int TT_UNKNOWN = -4;
076:
077:            /**
078:             * The token type
079:             */
080:            public int ttype = TT_UNKNOWN;
081:
082:            /**
083:             * Internal character meanings, 0 implies TOKEN_ORDINARY
084:             */
085:            private byte tokenTypes[] = new byte[256];
086:
087:            private static final byte TOKEN_COMMENT = 1;
088:
089:            private static final byte TOKEN_QUOTE = 2;
090:
091:            private static final byte TOKEN_WHITE = 4;
092:
093:            private static final byte TOKEN_WORD = 8;
094:
095:            private static final byte TOKEN_DIGIT = 16;
096:
097:            private int lineNumber = 1;
098:
099:            private boolean forceLowercase;
100:
101:            private boolean isEOLSignificant;
102:
103:            private boolean slashStarComments;
104:
105:            private boolean slashSlashComments;
106:
107:            private boolean pushBackToken;
108:
109:            private boolean lastCr;
110:
111:            /* One of these will have the stream */
112:            private InputStream inStream;
113:
114:            private Reader inReader;
115:
116:            private int peekChar = -2;
117:
118:            /**
119:             * Private constructor to initialize the default values according to the
120:             * specification.
121:             */
122:            private StreamTokenizer() {
123:                /*
124:                 * Initialize the default state per specification. All byte values 'A'
125:                 * through 'Z', 'a' through 'z', and '\u00A0' through '\u00FF' are
126:                 * considered to be alphabetic.
127:                 */
128:                wordChars('A', 'Z');
129:                wordChars('a', 'z');
130:                wordChars(160, 255);
131:                /**
132:                 * All byte values '\u0000' through '\u0020' are considered to be white
133:                 * space.
134:                 */
135:                whitespaceChars(0, 32);
136:                /**
137:                 * '/' is a comment character. Single quote '\'' and double quote '"'
138:                 * are string quote characters.
139:                 */
140:                commentChar('/');
141:                quoteChar('"');
142:                quoteChar('\'');
143:                /**
144:                 * Numbers are parsed.
145:                 */
146:                parseNumbers();
147:                /**
148:                 * Ends of lines are treated as white space, not as separate tokens.
149:                 * C-style and C++-style comments are not recognized. These are the
150:                 * defaults and are not needed in constructor.
151:                 */
152:            }
153:
154:            /**
155:             * Construct a new StreamTokenizer on the InputStream is. This usage of this
156:             * method should be replaced with the constructor which takes a Reader.
157:             * 
158:             * @param is
159:             *            The InputStream to parse tokens on.
160:             * 
161:             * @deprecated Use StreamTokenizer(Reader)
162:             */
163:            @Deprecated
164:            public StreamTokenizer(InputStream is) {
165:                this ();
166:                if (is == null) {
167:                    throw new NullPointerException();
168:                }
169:                inStream = is;
170:            }
171:
172:            /**
173:             * Construct a new StreamTokenizer on the Reader <code>r</code>.
174:             * Initialize the default state per specification.
175:             * <UL>
176:             * <LI>All byte values 'A' through 'Z', 'a' through 'z', and '&#92;u00A0'
177:             * through '&#92;u00FF' are considered to be alphabetic.</LI>
178:             * <LI>All byte values '&#92;u0000' through '&#92;u0020' are considered to
179:             * be white space. '/' is a comment character.</LI>
180:             * <LI>Single quote '\'' and double quote '"' are string quote characters.</LI>
181:             * <LI>Numbers are parsed.</LI>
182:             * <LI>Ends of lines are considered to be white space rather than separate
183:             * tokens.</LI>
184:             * <LI>C-style and C++-style comments are not recognized.</LI>
185:             * </UL>
186:             * These are the defaults and are not needed in constructor.
187:             * 
188:             * @param r
189:             *            The InputStream to parse tokens on.
190:             */
191:            public StreamTokenizer(Reader r) {
192:                this ();
193:                if (r == null) {
194:                    throw new NullPointerException();
195:                }
196:                inReader = r;
197:            }
198:
199:            /**
200:             * Set the character <code>ch</code> to be regarded as a comment
201:             * character.
202:             * 
203:             * @param ch
204:             *            The character to be considered a comment character.
205:             */
206:            public void commentChar(int ch) {
207:                if (0 <= ch && ch < tokenTypes.length) {
208:                    tokenTypes[ch] = TOKEN_COMMENT;
209:                }
210:            }
211:
212:            /**
213:             * Set a boolean indicating whether or not end of line is significant and
214:             * should be returned as <code>TT_EOF</code> in <code>ttype</code>.
215:             * 
216:             * @param flag
217:             *            <code>true</code> if EOL is significant, <code>false</code>
218:             *            otherwise.
219:             */
220:            public void eolIsSignificant(boolean flag) {
221:                isEOLSignificant = flag;
222:            }
223:
224:            /**
225:             * Answer the current line number.
226:             * 
227:             * @return the current line number.
228:             */
229:            public int lineno() {
230:                return lineNumber;
231:            }
232:
233:            /**
234:             * Set a boolean indicating whether or not tokens should be uppercased when
235:             * present in <code>sval</code>.
236:             * 
237:             * @param flag
238:             *            <code>true</code> if <code>sval</code> should be forced
239:             *            uppercase, <code>false</code> otherwise.
240:             */
241:            public void lowerCaseMode(boolean flag) {
242:                forceLowercase = flag;
243:            }
244:
245:            /**
246:             * Answer the next token type.
247:             * 
248:             * @return The next token to be parsed.
249:             * 
250:             * @throws IOException
251:             *             If an IO error occurs while getting the token
252:             */
253:            public int nextToken() throws IOException {
254:                if (pushBackToken) {
255:                    pushBackToken = false;
256:                    if (ttype != TT_UNKNOWN) {
257:                        return ttype;
258:                    }
259:                }
260:                sval = null; // Always reset sval to null
261:                int currentChar = peekChar == -2 ? read() : peekChar;
262:
263:                if (lastCr && currentChar == '\n') {
264:                    lastCr = false;
265:                    currentChar = read();
266:                }
267:                if (currentChar == -1) {
268:                    return (ttype = TT_EOF);
269:                }
270:
271:                byte currentType = currentChar > 255 ? TOKEN_WORD
272:                        : tokenTypes[currentChar];
273:                while ((currentType & TOKEN_WHITE) != 0) {
274:                    /**
275:                     * Skip over white space until we hit a new line or a real token
276:                     */
277:                    if (currentChar == '\r') {
278:                        lineNumber++;
279:                        if (isEOLSignificant) {
280:                            lastCr = true;
281:                            peekChar = -2;
282:                            return (ttype = TT_EOL);
283:                        }
284:                        if ((currentChar = read()) == '\n') {
285:                            currentChar = read();
286:                        }
287:                    } else if (currentChar == '\n') {
288:                        lineNumber++;
289:                        if (isEOLSignificant) {
290:                            peekChar = -2;
291:                            return (ttype = TT_EOL);
292:                        }
293:                        currentChar = read();
294:                    } else {
295:                        // Advance over this white space character and try again.
296:                        currentChar = read();
297:                    }
298:                    if (currentChar == -1) {
299:                        return (ttype = TT_EOF);
300:                    }
301:                    currentType = currentChar > 255 ? TOKEN_WORD
302:                            : tokenTypes[currentChar];
303:                }
304:
305:                /**
306:                 * Check for digits before checking for words since digits can be
307:                 * contained within words.
308:                 */
309:                if ((currentType & TOKEN_DIGIT) != 0) {
310:                    StringBuilder digits = new StringBuilder(20);
311:                    boolean haveDecimal = false, checkJustNegative = currentChar == '-';
312:                    while (true) {
313:                        if (currentChar == '.') {
314:                            haveDecimal = true;
315:                        }
316:                        digits.append((char) currentChar);
317:                        currentChar = read();
318:                        if ((currentChar < '0' || currentChar > '9')
319:                                && (haveDecimal || currentChar != '.')) {
320:                            break;
321:                        }
322:                    }
323:                    peekChar = currentChar;
324:                    if (checkJustNegative && digits.length() == 1) {
325:                        // Didn't get any other digits other than '-'
326:                        return (ttype = '-');
327:                    }
328:                    try {
329:                        nval = Double.valueOf(digits.toString()).doubleValue();
330:                    } catch (NumberFormatException e) {
331:                        // Unsure what to do, will write test.
332:                        nval = 0;
333:                    }
334:                    return (ttype = TT_NUMBER);
335:                }
336:                // Check for words
337:                if ((currentType & TOKEN_WORD) != 0) {
338:                    StringBuffer word = new StringBuffer(20);
339:                    while (true) {
340:                        word.append((char) currentChar);
341:                        currentChar = read();
342:                        if (currentChar == -1
343:                                || (currentChar < 256 && (tokenTypes[currentChar] & (TOKEN_WORD | TOKEN_DIGIT)) == 0)) {
344:                            break;
345:                        }
346:                    }
347:                    peekChar = currentChar;
348:                    sval = forceLowercase ? word.toString().toLowerCase()
349:                            : word.toString();
350:                    return (ttype = TT_WORD);
351:                }
352:                // Check for quoted character
353:                if (currentType == TOKEN_QUOTE) {
354:                    int matchQuote = currentChar;
355:                    StringBuffer quoteString = new StringBuffer();
356:                    int peekOne = read();
357:                    while (peekOne >= 0 && peekOne != matchQuote
358:                            && peekOne != '\r' && peekOne != '\n') {
359:                        boolean readPeek = true;
360:                        if (peekOne == '\\') {
361:                            int c1 = read();
362:                            // Check for quoted octal IE: \377
363:                            if (c1 <= '7' && c1 >= '0') {
364:                                int digitValue = c1 - '0';
365:                                c1 = read();
366:                                if (c1 > '7' || c1 < '0') {
367:                                    readPeek = false;
368:                                } else {
369:                                    digitValue = digitValue * 8 + (c1 - '0');
370:                                    c1 = read();
371:                                    // limit the digit value to a byte
372:                                    if (digitValue > 037 || c1 > '7'
373:                                            || c1 < '0') {
374:                                        readPeek = false;
375:                                    } else {
376:                                        digitValue = digitValue * 8
377:                                                + (c1 - '0');
378:                                    }
379:                                }
380:                                if (!readPeek) {
381:                                    // We've consumed one to many
382:                                    quoteString.append((char) digitValue);
383:                                    peekOne = c1;
384:                                } else {
385:                                    peekOne = digitValue;
386:                                }
387:                            } else {
388:                                switch (c1) {
389:                                case 'a':
390:                                    peekOne = 0x7;
391:                                    break;
392:                                case 'b':
393:                                    peekOne = 0x8;
394:                                    break;
395:                                case 'f':
396:                                    peekOne = 0xc;
397:                                    break;
398:                                case 'n':
399:                                    peekOne = 0xA;
400:                                    break;
401:                                case 'r':
402:                                    peekOne = 0xD;
403:                                    break;
404:                                case 't':
405:                                    peekOne = 0x9;
406:                                    break;
407:                                case 'v':
408:                                    peekOne = 0xB;
409:                                    break;
410:                                default:
411:                                    peekOne = c1;
412:                                }
413:                            }
414:                        }
415:                        if (readPeek) {
416:                            quoteString.append((char) peekOne);
417:                            peekOne = read();
418:                        }
419:                    }
420:                    if (peekOne == matchQuote) {
421:                        peekOne = read();
422:                    }
423:                    peekChar = peekOne;
424:                    ttype = matchQuote;
425:                    sval = quoteString.toString();
426:                    return ttype;
427:                }
428:                // Do comments, both "//" and "/*stuff*/"
429:                if (currentChar == '/'
430:                        && (slashSlashComments || slashStarComments)) {
431:                    if ((currentChar = read()) == '*' && slashStarComments) {
432:                        int peekOne = read();
433:                        while (true) {
434:                            currentChar = peekOne;
435:                            peekOne = read();
436:                            if (currentChar == -1) {
437:                                peekChar = -1;
438:                                return (ttype = TT_EOF);
439:                            }
440:                            if (currentChar == '\r') {
441:                                if (peekOne == '\n') {
442:                                    peekOne = read();
443:                                }
444:                                lineNumber++;
445:                            } else if (currentChar == '\n') {
446:                                lineNumber++;
447:                            } else if (currentChar == '*' && peekOne == '/') {
448:                                peekChar = read();
449:                                return nextToken();
450:                            }
451:                        }
452:                    } else if (currentChar == '/' && slashSlashComments) {
453:                        // Skip to EOF or new line then return the next token
454:                        while ((currentChar = read()) >= 0
455:                                && currentChar != '\r' && currentChar != '\n') {
456:                            // Intentionally empty
457:                        }
458:                        peekChar = currentChar;
459:                        return nextToken();
460:                    } else if (currentType != TOKEN_COMMENT) {
461:                        // Was just a slash by itself
462:                        peekChar = currentChar;
463:                        return (ttype = '/');
464:                    }
465:                }
466:                // Check for comment character
467:                if (currentType == TOKEN_COMMENT) {
468:                    // Skip to EOF or new line then return the next token
469:                    while ((currentChar = read()) >= 0 && currentChar != '\r'
470:                            && currentChar != '\n') {
471:                        // Intentionally empty
472:                    }
473:                    peekChar = currentChar;
474:                    return nextToken();
475:                }
476:
477:                peekChar = read();
478:                return (ttype = currentChar);
479:            }
480:
481:            /**
482:             * Set the character <code>ch</code> to be regarded as an ordinary
483:             * character.
484:             * 
485:             * @param ch
486:             *            The character to be considered an ordinary comment character.
487:             */
488:            public void ordinaryChar(int ch) {
489:                if (0 <= ch && ch < tokenTypes.length) {
490:                    tokenTypes[ch] = 0;
491:                }
492:            }
493:
494:            /**
495:             * Set the characters ranging from <code>low</code> to <code>hi</code>
496:             * to be regarded as ordinary characters.
497:             * 
498:             * @param low
499:             *            The starting range for ordinary characters.
500:             * @param hi
501:             *            The ending range for ordinary characters.
502:             */
503:            public void ordinaryChars(int low, int hi) {
504:                if (low < 0) {
505:                    low = 0;
506:                }
507:                if (hi > tokenTypes.length) {
508:                    hi = tokenTypes.length - 1;
509:                }
510:                for (int i = low; i <= hi; i++) {
511:                    tokenTypes[i] = 0;
512:                }
513:            }
514:
515:            /**
516:             * Indicate that numbers should be parsed.
517:             */
518:            public void parseNumbers() {
519:                for (int i = '0'; i <= '9'; i++) {
520:                    tokenTypes[i] |= TOKEN_DIGIT;
521:                }
522:                tokenTypes['.'] |= TOKEN_DIGIT;
523:                tokenTypes['-'] |= TOKEN_DIGIT;
524:            }
525:
526:            /**
527:             * Indicate that the current token should be pushed back and returned the
528:             * next time <code>nextToken()</code> is called.
529:             */
530:            public void pushBack() {
531:                pushBackToken = true;
532:            }
533:
534:            /**
535:             * Set the character <code>ch</code> to be regarded as a quote character.
536:             * 
537:             * @param ch
538:             *            The character to be considered a quote comment character.
539:             */
540:            public void quoteChar(int ch) {
541:                if (0 <= ch && ch < tokenTypes.length) {
542:                    tokenTypes[ch] = TOKEN_QUOTE;
543:                }
544:            }
545:
546:            private int read() throws IOException {
547:                // Call the read for the appropriate stream
548:                if (inStream == null) {
549:                    return inReader.read();
550:                }
551:                return inStream.read();
552:            }
553:
554:            /**
555:             * Reset all characters so that they are ordinary.
556:             */
557:            public void resetSyntax() {
558:                for (int i = 0; i < 256; i++) {
559:                    tokenTypes[i] = 0;
560:                }
561:            }
562:
563:            /**
564:             * Set a boolean indicating whether or not slash slash comments should be
565:             * recognized. The comment ends at a new line.
566:             * 
567:             * @param flag
568:             *            <code>true</code> if <code>//</code> should be recognized
569:             *            as the start of a comment, <code>false</code> otherwise.
570:             */
571:            public void slashSlashComments(boolean flag) {
572:                slashSlashComments = flag;
573:            }
574:
575:            /**
576:             * Set a boolean indicating whether or not slash star comments should be
577:             * recognized. Slash-star comments cannot be nested and end when a
578:             * star-slash combination is found.
579:             * 
580:             * @param flag
581:             *            <code>true</code> if <code>/*</code> should be recognized
582:             *            as the start of a comment, <code>false</code> otherwise.
583:             */
584:            public void slashStarComments(boolean flag) {
585:                slashStarComments = flag;
586:            }
587:
588:            /**
589:             * Answer the state of this tokenizer in a readable format.
590:             * 
591:             * @return The current state of this tokenizer.
592:             */
593:            @Override
594:            public String toString() {
595:                // Values determined through experimentation
596:                StringBuilder result = new StringBuilder();
597:                result.append("Token["); //$NON-NLS-1$
598:                switch (ttype) {
599:                case TT_EOF:
600:                    result.append("EOF"); //$NON-NLS-1$
601:                    break;
602:                case TT_EOL:
603:                    result.append("EOL"); //$NON-NLS-1$
604:                    break;
605:                case TT_NUMBER:
606:                    result.append("n="); //$NON-NLS-1$
607:                    result.append(nval);
608:                    break;
609:                case TT_WORD:
610:                    result.append(sval);
611:                    break;
612:                default:
613:                    if (ttype == TT_UNKNOWN || tokenTypes[ttype] == TOKEN_QUOTE) {
614:                        result.append(sval);
615:                    } else {
616:                        result.append('\'');
617:                        result.append((char) ttype);
618:                        result.append('\'');
619:                    }
620:                }
621:                result.append("], line "); //$NON-NLS-1$
622:                result.append(lineNumber);
623:                return result.toString();
624:            }
625:
626:            /**
627:             * Set the characters ranging from <code>low</code> to <code>hi</code>
628:             * to be regarded as whitespace characters.
629:             * 
630:             * @param low
631:             *            The starting range for whitespace characters.
632:             * @param hi
633:             *            The ending range for whitespace characters.
634:             */
635:            public void whitespaceChars(int low, int hi) {
636:                if (low < 0) {
637:                    low = 0;
638:                }
639:                if (hi > tokenTypes.length) {
640:                    hi = tokenTypes.length - 1;
641:                }
642:                for (int i = low; i <= hi; i++) {
643:                    tokenTypes[i] = TOKEN_WHITE;
644:                }
645:            }
646:
647:            /**
648:             * Set the characters ranging from <code>low</code> to <code>hi</code>
649:             * to be regarded as word characters.
650:             * 
651:             * @param low
652:             *            The starting range for word characters.
653:             * @param hi
654:             *            The ending range for word characters.
655:             */
656:            public void wordChars(int low, int hi) {
657:                if (low < 0) {
658:                    low = 0;
659:                }
660:                if (hi > tokenTypes.length) {
661:                    hi = tokenTypes.length - 1;
662:                }
663:                for (int i = low; i <= hi; i++) {
664:                    tokenTypes[i] |= TOKEN_WORD;
665:                }
666:            }
667:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.