001: package tide.editor.styler;
002:
003: import snow.texteditor.*;
004: import java.util.*;
005: import javax.swing.text.*;
006:
007: /** Recognize the comments, words and litterals in a java source code document
008: Very easy, it is just used to highlight quickly the code, NOT as a real parser.
009:
010: A special mode allow to refresh only parts of the document.
011: Be careful not to start it within a comment or string. (NOT IMPLEMENTED)
012:
013: This is very fast.
014: */
015: public class SimpleCodeParser {
016: public static enum ItemType {
017: Word, Comment, Litteral
018: }
019:
020: // shifts all items positions. Used when the text is taken from other positions than the start
021: // of the document
022: final private int shift;
023: final String documentContent;
024: private int actualStartPosition;
025: private final int endPosition;
026:
027: private boolean isInLitteral = false;
028: private boolean isInComment = false;
029: // private boolean isAnnotation = false;
030:
031: private char litteralChar = '\''; // or "
032: private boolean commentSingleLine = false;
033:
034: /** parses the whole document.
035: */
036: public SimpleCodeParser(String doc) {
037: this (doc, 0, doc.length(), 0);
038: }
039:
040: /** to be called OUTSIDE of a comment.
041: * @param shift is used to shift all produced items positions.
042: * Must be the doc start position in the original document, used in partial parsing
043: */
044: public SimpleCodeParser(String doc, int from, int to, int shift) {
045: this .documentContent = doc;
046: this .actualStartPosition = from;
047: this .endPosition = to;
048: this .shift = shift;
049: }
050:
051: /** @return null if no more item
052: */
053: public Item getNextItem() {
054: //int n=0;
055:
056: wt: while (true) // read all lines until end of document
057: {
058: //n++;
059: /* if(n>100000)
060: {
061: System.out.println("too much words: "+n);
062: return null;
063: }*/
064:
065: // search next starting item
066: int start = -1;
067: StringBuilder word = new StringBuilder();
068: for (int i = actualStartPosition; i < endPosition; i++) {
069: char ci = documentContent.charAt(i);
070: if (ci == 0) {
071: System.out
072: .println("SimpleCodeParser stopped: char 0 at "
073: + i);
074: return null;
075: //break wt;
076: }
077:
078: if (ci == '/') {
079: char cip1 = getNextCharOrZero(i);
080: if (cip1 == '/') {
081: this .isInComment = true;
082: commentSingleLine = true;
083: start = i;
084: word.append(ci);
085: break; // ends with a line return
086: }
087: if (cip1 == '*') {
088: this .isInComment = true;
089: commentSingleLine = false;
090: start = i;
091: word.append(ci);
092: break; // ends with */
093: }
094: }
095:
096: if (ci == '\'') {
097: this .isInLitteral = true;
098: litteralChar = ci;
099: start = i;
100: word.append(ci);
101: break; // ends with same char
102: }
103:
104: if (ci == '\"') {
105: this .isInLitteral = true;
106: litteralChar = ci;
107: start = i;
108: word.append(ci); // ends with same char
109: break;
110: }
111:
112: if (Character.isJavaIdentifierStart(ci)) {
113: start = i;
114: word.append(ci);
115: break;
116: }
117: }
118: if (start == -1)
119: return null;
120:
121: // boolean endReached = false;
122: l2: for (int i = start + 1; i < endPosition; i++) {
123: char ci = documentContent.charAt(i);
124:
125: if (this .isInComment) {
126: if (this .commentSingleLine) {
127: if (ci == '\r')
128: break l2;
129: if (ci == '\n')
130: break l2;
131: } else {
132: if (ci == '/') {
133: char prev = getPreviousCharOrZero(i);
134: if (prev == '*') {
135: word.append(ci);
136: break l2;
137: }
138: }
139: }
140: word.append(ci);
141: } else if (this .isInLitteral) {
142:
143: if (ci == litteralChar) {
144: if (!isTheEndAvoidingDelimiterRole(word)) {
145: word.append(ci);
146: break l2;
147: } // else continue
148: }
149: word.append(ci);
150: } else {
151: if (Character.isJavaIdentifierPart(ci)) {
152: word.append(ci);
153: } else {
154: break l2;
155: }
156: }
157: }
158: actualStartPosition = start + word.length();
159:
160: if (this .isInComment) {
161: this .isInComment = false;
162: return new Item(word.toString(), start + shift,
163: ItemType.Comment);
164: } else if (this .isInLitteral) {
165: this .isInLitteral = false;
166: return new Item(word.toString(), start + shift,
167: ItemType.Litteral);
168: } else {
169: return new Item(word.toString(), start + shift,
170: ItemType.Word);
171: }
172:
173: }
174: }
175:
176: /** backslash + string delimiter ' or " is not a delimiter
177: \\" is because \\ represents a single slash
178: */
179: private boolean isTheEndAvoidingDelimiterRole(CharSequence sb) {
180: int countBackSlashes = 0;
181: for (int i = sb.length() - 1; i >= 0; i--) {
182: if (sb.charAt(i) == '\\') {
183: countBackSlashes++;
184: } else {
185: break;
186: }
187: }
188: if (countBackSlashes == 0)
189: return false;
190: if (countBackSlashes % 2 != 0)
191: return true;
192: return false;
193: }
194:
195: private char getNextCharOrZero(int pos) {
196: if (pos + 1 >= this .documentContent.length())
197: return (char) 0;
198: return documentContent.charAt(pos + 1);
199: }
200:
201: private char getPreviousCharOrZero(int pos) {
202: if (pos - 1 < 0)
203: return (char) 0;
204: return documentContent.charAt(pos - 1);
205: }
206:
207: /** Inner static class representing an item (Word, comment, ...)
208: */
209: public static class Item {
210: final public int positionInDocument;
211: final public String word;
212: final public ItemType type;
213:
214: public Item(String w, int pos, ItemType type) {
215: this .word = w;
216: this .positionInDocument = pos;
217: this .type = type;
218: }
219:
220: public int getPositionEnd() {
221: return positionInDocument + word.length();
222: }
223:
224: @Override
225: public String toString() {
226: return word + " (" + positionInDocument + ", " + type + ")";
227: }
228: }
229:
230: public static void main(String[] args) {
231: String doc = " Hallo hello \r\ne // aaa\r\nimporot\"hallo\"/** aaa */";
232:
233: SimpleCodeParser dwt = new SimpleCodeParser(doc);
234: Item it = null;
235: while ((it = dwt.getNextItem()) != null) {
236: System.out.println("" + it);
237: }
238:
239: }
240:
241: }
|