001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: // @todo The current version does not handle comment tokens inside "" or ''
043: // correct! (remember that such a section may span multiple lines!!!)
044: package org.netbeans.modules.tasklist.todo;
045:
046: import java.io.IOException;
047: import java.util.regex.Matcher;
048: import java.util.regex.Pattern;
049:
050: /**
051: * The Source Code Comment Parser allows you to read the comments in a source
052: * code line by line.
053: *
054: * @author Trond Norbye
055: * @author S. Aubrecht
056: */
057: final class SourceCodeCommentParser {
058:
059: /**
060: * Default instance, treat all lines as comments!!
061: */
062: public SourceCodeCommentParser() {
063: parser = new SourceParser();
064: }
065:
066: /**
067: * Create a new instance of SourceCodeCommentParser that supports single-
068: * line comments, and multiline comments
069: * @param lineComment the start tag for a single-line comment
070: * @param blockStart the start tag of a block comment
071: * @param blockEnd the end tag of a block comment
072: */
073: public SourceCodeCommentParser(String lineComment,
074: String blockStart, String blockEnd) {
075: parser = new CommentParser(lineComment, blockStart, blockEnd);
076: }
077:
078: /**
079: * Set the document to parse
080: * @param doc the document to parse
081: */
082: public void setText(String text) {
083: parser.setText(text);
084: }
085:
086: /**
087: * get the range for the next comment line...
088: * @param ret Where to store the result
089: * @return false when EOF, true otherwise
090: */
091: public boolean nextRegion(CommentRegion reg) throws IOException {
092: return parser.nextRegion(reg);
093: }
094:
095: /**
096: * I don't know if this was a smart thing to do, but instead of testing
097: * each time if I should skip comments or not, I decided to create an
098: * an internal parser that I could extend to my needs... The most generic
099: * parser treats everything as comments, and should hence "work" for all
100: * unknown file types ;)
101: */
102: private static class SourceParser {
103:
104: /**
105: * Create a new instance of the SourceParser
106: */
107: public SourceParser() {
108: text = null;
109: curr = 0;
110: matcher = null;
111: }
112:
113: /**
114: * Get the indexes of the next comment region..
115: * @param ret Where to store the result
116: * @return false when EOF, true otherwise
117: * @throws java.io.IOException if a read error occurs on the input
118: * stream.
119: */
120: public boolean nextRegion(CommentRegion reg) throws IOException {
121: if (text == null) {
122: return false;
123: }
124:
125: reg.start = curr;
126: reg.stop = text.length();
127:
128: if (reg.start == reg.stop) {
129: return false;
130: }
131:
132: curr = reg.stop;
133: return true;
134: }
135:
136: /**
137: * Set the document to parse
138: * @param doc the document to parse
139: */
140: public void setText(String text) {
141: this .text = text;
142:
143: if (pattern != null) {
144: matcher = pattern.matcher(text);
145: }
146: }
147:
148: /**
149: * Append all characters in a string to a stringbuffer as \\unnnn
150: * @param buf destination buffer
151: * @param str the string to append
152: */
153: protected void appendEncodedChars(StringBuffer buf, String str) {
154: int len = str.length();
155:
156: for (int ii = 0; ii < len; ++ii) {
157: String s = Integer.toHexString((int) str.charAt(ii));
158:
159: buf.append("\\u");
160: for (int i = 0, n = 4 - s.length(); i < n; i++) {
161: buf.append('0');
162: }
163: buf.append(s);
164: }
165: }
166:
167: /**
168: * A StringBuffer that I use towards the source reader to avoid the
169: * creation of a lot of strings...
170: */
171: protected String text;
172:
173: /** current position in the text*/
174: protected int curr;
175:
176: /** A matcher that may be utilized by a subclass... */
177: protected Matcher matcher;
178: /** The pattern to search for in the text */
179: protected Pattern pattern;
180:
181: }
182:
183: /**
184: * The comment parser exstend the source parser with functionality to
185: * create single line comments, and a block of lines that are treated as
186: * a comment.
187: */
188: private static class CommentParser extends SourceParser {
189: /**
190: * Create a new instance of the comment parser that only supports
191: * a "single-line" comments
192: * @param lineComment the token to start a line comment
193: */
194: public CommentParser(String lineComment) {
195: this (lineComment, null, null);
196: }
197:
198: /**
199: * Create a new instance of the comment parser that supports:
200: * @param lineComment the token for a single line comment
201: * @param blockStart the start token for a multiline comment block
202: * @param blockEnd the end token for a multiline comment block
203: */
204: public CommentParser(String lineComment, String blockStart,
205: String blockEnd) {
206: super ();
207: this .lineComment = lineComment;
208: this .blockStart = blockStart;
209: this .blockEnd = blockEnd;
210:
211: StringBuffer sb = new StringBuffer();
212:
213: boolean needor = false;
214:
215: if (lineComment != null) {
216: appendEncodedChars(sb, lineComment);
217: needor = true;
218: }
219:
220: if (blockStart != null) {
221: if (needor) {
222: sb.append('|');
223: }
224: appendEncodedChars(sb, blockStart);
225: }
226:
227: pattern = Pattern.compile(sb.toString());
228: matcher = null;
229: }
230:
231: /**
232: * Get the next line of text from the file.
233: * @param reg Where to store the result
234: * @return false when EOF, true otherwise
235: * @throws java.io.IOException if a read error occurs on the input
236: * stream.
237: */
238: public boolean nextRegion(CommentRegion reg) throws IOException {
239: boolean ret = false;
240:
241: if (matcher != null && matcher.find(curr)) {
242: String token = text.substring(matcher.start(), matcher
243: .end());
244:
245: reg.start = matcher.start();
246:
247: if (lineComment != null && lineComment.equals(token)) {
248: int idx = text.indexOf("\n", reg.start);
249: if (idx != -1) {
250: reg.stop = idx;
251: } else {
252: reg.stop = text.length();
253: }
254: } else if (blockStart != null) {
255: int idx = text.indexOf(blockEnd, reg.start);
256: if (idx != -1) {
257: reg.stop = idx + blockEnd.length();
258: } else {
259: reg.stop = text.length();
260: }
261: } else {
262: return false; // no need to scan for commens if these are not defined at all
263: }
264:
265: curr = reg.stop + 1;
266: ret = true;
267: }
268: return ret;
269: }
270:
271: /** The string that indicates the start of a single line comment */
272: protected String lineComment;
273: /** The string that indicates the start of a multiline comment */
274: protected String blockStart;
275: /** The string that indicates the end of a multiline comment */
276: protected String blockEnd;
277:
278: }
279:
280: /** A little handy struct to pass up to the parent.. */
281: static class CommentRegion {
282: /** The position in the text where the comment starts */
283: public int start;
284: /** The position in the text where the comment ends */
285: public int stop;
286:
287: /** Create a new instance */
288: public CommentRegion() {
289: start = stop = 0;
290: }
291: }
292:
293: /** The parser used by this SourceCodeCommentParser */
294: private SourceParser parser;
295: }
|