001: /*
002: * Sun Public License Notice
003: *
004: * The contents of this file are subject to the Sun Public License
005: * Version 1.0 (the "License"). You may not use this file except in
006: * compliance with the License. A copy of the License is available at
007: * http://www.sun.com/
008: *
009: * The Original Code is NetBeans. The Initial Developer of the Original
010: * Code is Sun Microsystems, Inc. Portions Copyright 1997-2000 Sun
011: * Microsystems, Inc. All Rights Reserved.
012: */
013:
014: package org.netbeans.editor.ext.plain;
015:
016: import org.netbeans.editor.Syntax;
017: import org.netbeans.editor.TokenID;
018:
019: /**
020: * Lexical analyzer for the plain text.
021: *
022: * @author Miloslav Metelka
023: * @version 1.00
024: */
025:
026: public class PlainSyntax extends Syntax {
027:
028: /*
029: * Internal states used internally by analyzer. There can be any number of
030: * them declared by the analyzer. They are usually numbered starting from
031: * zero but they don't have to. The only reserved value is -1 which is
032: * reserved for the INIT state - the initial internal state of the analyzer.
033: */
034: private static final int ISI_TEXT = 0;
035:
036: public PlainSyntax() {
037: tokenContextPath = PlainTokenContext.contextPath;
038: }
039:
040: /**
041: * This is core function of analyzer and it returns one of following
042: * numbers: a) token number of next token from scanned text b) EOL when end
043: * of line was found in scanned buffer c) EOT when there is no more chars
044: * available in scanned buffer.
045: *
046: * The function scans the active character and does one or more of the
047: * following actions: 1. change internal analyzer state (state = new-state)
048: * 2. return token ID (return token-ID) 3. adjust current position to signal
049: * different end of token; the character that offset points to is not
050: * included in the token
051: */
052: protected TokenID parseToken() {
053: // The main loop that reads characters one by one follows
054: while (offset < stopOffset) {
055: char ch = buffer[offset]; // get the current character
056:
057: switch (state) { // switch by the current internal state
058: case INIT:
059: switch (ch) {
060: case '\n':
061: offset++;
062: return PlainTokenContext.EOL;
063: default:
064: state = ISI_TEXT;
065: break;
066: }
067: break;
068:
069: case ISI_TEXT:
070: switch (ch) {
071: case '\n':
072: state = INIT;
073: return PlainTokenContext.TEXT;
074: }
075: break;
076:
077: } // end of switch(state)
078:
079: offset++; // move to the next char
080: }
081:
082: /*
083: * At this state there's no more text in the scanned buffer. The caller
084: * will decide either to stop scanning at all or to relocate scanning
085: * and provide next buffer with characters. The lastBuffer variable
086: * indicates whether the scanning will stop (true) or the caller will
087: * provide another buffer to continue on (false) and call relocate() to
088: * continue on the given buffer. If this is the last buffer, the
089: * analyzer must ensure that for all internal states there will be some
090: * token ID returned. The easiest way how to ensure that all the
091: * internal states will be covered is to copy all the internal state
092: * constants and put them after the switch() and provide the code that
093: * will return appropriate token ID.
094: *
095: * When there are no more characters available in the buffer and the
096: * buffer is not the last one the analyzer can still decide to return
097: * the token ID even if it doesn't know whether the token is complete or
098: * not. This is possible in this simple implementation for example
099: * because it doesn't matter whether it returns the text all together or
100: * broken into several pieces. The advantage of such aproach is that the
101: * preScan value is minimized which avoids the additional increasing of
102: * the buffer by preScan characters, but on the other hand it can become
103: * problematic if the token should be forwarded for some further
104: * processing. For example it could seem handy to return incomplete
105: * token for java block comments but it could become difficult if we
106: * would want to analyzer these comment tokens additionally by the HTML
107: * analyzer for example.
108: */
109:
110: // Normally the following block would be done only for lastBuffer ==
111: // true
112: // but in this case it can always be done
113: switch (state) {
114: case ISI_TEXT:
115: state = INIT;
116: return PlainTokenContext.TEXT;
117: }
118:
119: // need to continue on another buffer
120: return null;
121: }
122:
123: }
|