001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.lexer;
043:
044: /**
045: * Character preprocessor allows to translate a sequence
046: * of characters to a single character so it may be used
047: * for example for Unicode sequences translation.
048: * <br/>
049: * If there are any preprocessed characters for a particular token
050: * then a special token instance get created that provides
051: * the preprocessed chars.
052: *
053: * @author Miloslav Metelka
054: * @version 1.00
055: */
056:
057: public abstract class CharPreprocessor {
058:
059: /**
060: * Create instance of character preprocessor for Unicode escape sequences.
061: */
062: public static CharPreprocessor createUnicodeEscapesPreprocessor() {
063: return new UnicodeEscapesPreprocessor();
064: }
065:
066: private CharPreprocessorOperation operation;
067:
068: /**
069: * Preprocess at least one character of the input.
070: * <br/>
071: * Preprocessor must always preprocess at least one input character
072: * per invocation but only a minimum necessary number of characters
073: * should be preprocessed by each invocation of this method.
074: *
075: * <p>
076: * Example:
077: * <pre>
078: * public void preprocessChar() {
079: * switch (ch = inputRead()) {
080: * case '\\': // possible start of sequence
081: * switch (ch = inputRead()) {
082: * case 'u': // start of escape sequence
083: * ... // read the whole sequence
084: * outputPreprocessed(prepCh, extraInputLength);
085: * break;
086: * default:
087: * outputOriginal('\\');
088: * outputOriginal(ch);
089: * break;
090: * }
091: * break;
092: * default:
093: * outputOriginal(ch);
094: * }
095: * }
096: * </pre>
097: * </p>
098: *
099: * <p>
100: * The processor is only designed to do several-chars-to-one translation.
101: * <br/>
102: * It is not designed to return more than one character for a single input char.
103: * <br/>
104: * Also if the character is really preprocessed it must be composed
105: * from at least two input characters (see extraInputLength parameter
106: * of {@link #outputPreprocessed(char, int)}.
107: * </p>
108: *
109: * <p>
110: * The preprocessor must be able to process all the characters
111: * given to it on input.
112: * However it should not preprocess EOF in any way
113: * - the EOF is just information that there is an end of the input
114: * and any possibly unfinished escape sequence needs to be translated
115: * in a reasonable way.
116: * <br/>
117: * Once all the characters prior EOF were preprocessed the EOF
118: * should be returned by {@link #outputOriginal(int)}.
119: * </p>
120: *
121: */
122: protected abstract void preprocessChar();
123:
124: /**
125: * Check whether the given character may be part of the sequences preprocessed
126: * by this preprocessor.
127: * <br/>
128: * The infrastructure may use this method to test whether it can start
129: * relexing starting at a particular position.
130: */
131: protected abstract boolean isSensitiveChar(char ch);
132:
133: /**
134: * Return maximum number of extra characters (not being part of the recognized
135: * sequence) that this preprocessor
136: * may look ahead in order to recognize the preprocessed character sequence.
137: * <br/>
138: * For example for unicode escape sequences the returned number is 1
139: * (see UnicodeEscapesPreprocessor implementation for details).
140: */
141: protected abstract int maxLookahead();
142:
143: /**
144: * Read a single character for preprocessing from the underlying input.
145: *
146: * @return valid character or {@link LexerInput#EOF} if there are no more
147: * characters available on the input.
148: */
149: protected final int inputRead() {
150: return operation.inputRead();
151: }
152:
153: /**
154: * Backup a given number of input characters.
155: *
156: * @param count >=0 number of chars to backup.
157: */
158: protected final void inputBackup(int count) {
159: operation.inputBackup(count);
160: }
161:
162: /**
163: * Output the character as it was read from the input.
164: * <br/>
165: * By using this method the infrastructure knows that the character
166: * is the same like the original character read by {@link #inputRead()}.
167: */
168: protected final void outputOriginal(int ch) {
169: operation.outputOriginal(ch);
170: }
171:
172: /**
173: * Output preprocessed character. There is usually more than one input character
174: * forming a single preprocessed character.
175: *
176: * @param ch preprocessed character.
177: * @param extraInputLength >0 number of extra input characters
178: * (besides a single character) that form the preprocessed character.
179: * <br/>
180: * For example for unicode escape sequence "\u0020" it's 6-1=5.
181: * <br/>
182: * The number is expected to be greater than zero
183: * (otherwise the present implementation would not work correctly).
184: * which should be fine for the known implementations
185: * (if not please request an API change).
186: */
187: protected final void outputPreprocessed(char ch,
188: int extraInputLength) {
189: assert (extraInputLength > 0) : "extraInputLength > 0 expected.";
190: operation.outputPreprocessed(ch, extraInputLength);
191: }
192:
193: /**
194: * Notify the error that occurred during preprocessing of the current character.
195: *
196: * @param errorMessage non-null error description.
197: */
198: protected final void notifyError(String errorMessage) {
199: operation.notifyError(errorMessage);
200: }
201:
202: void init(CharPreprocessorOperation operation) {
203: this.operation = operation;
204: }
205:
206: }
|